xref: /titanic_41/usr/src/cmd/lvm/metassist/layout/layout_stripe.c (revision 3d7072f8bd27709dba14f6fe336f149d25d9e207)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <string.h>
30 
31 #include <libintl.h>
32 
33 #include "volume_error.h"
34 #include "volume_devconfig.h"
35 #include "volume_dlist.h"
36 #include "volume_output.h"
37 
38 #include "layout_device_cache.h"
39 #include "layout_device_util.h"
40 #include "layout_discovery.h"
41 #include "layout_dlist_util.h"
42 #include "layout_messages.h"
43 #include "layout_request.h"
44 #include "layout_slice.h"
45 #include "layout_svm_util.h"
46 
47 #define	_LAYOUT_STRIPE_C
48 
49 static int compose_stripe(
50 	devconfig_t	*request,
51 	uint64_t	nbytes,
52 	dlist_t		*disks,
53 	int		max,
54 	int		min,
55 	dlist_t		*othervols,
56 	devconfig_t	**stripe);
57 
58 static int compose_stripe_within_hba(
59 	devconfig_t	*request,
60 	dlist_t		*hbas,
61 	uint64_t	nbytes,
62 	uint16_t	min,
63 	uint16_t	max,
64 	devconfig_t	**stripe);
65 
66 static int assemble_stripe(
67 	devconfig_t	*request,
68 	dlist_t		*comps,
69 	devconfig_t	**stripe);
70 
71 static dlist_t *
72 order_stripe_components_alternate_hbas(
73 	dlist_t *comps);
74 
75 static int compute_usable_stripe_capacity(
76 	dlist_t		*comps,
77 	uint64_t	ilace,
78 	uint64_t	*nbytes);
79 
80 /*
81  * FUNCTION:	layout_stripe(devconfig_t *request, uint64_t nbytes,
82  *			dlist_t **results)
83  *
84  * INPUT:	request	- pointer to a devconfig_t of the current request
85  *		nbytes	- the desired capacity of the stripe
86  *
87  * OUPUT:	results	- pointer to a list of composed volumes
88  *
89  * RETURNS:	int	- 0 on success
90  *			 !0 otherwise.
91  *
92  * PURPOSE:	Main layout driver for composing stripe volumes.
93  *
94  *		Attempts to construct a stripe of size nbytes.
95  *
96  *		Basic goal of all strategies is to build wide-thin stripes:
97  *		build widest stripe possible across as many HBAs as possible.
98  *
99  *		Several different layout strategies are tried in order
100  *		of preference until one succeeds or there are none left.
101  *
102  *		1 - stripe across similar HBAs
103  *		    . number of components is driven by # of HBAs
104  *		    . requires mincomp available HBAs
105  *
106  *		2 - stripe within a single HBA
107  *		    . number of components is driven by # of disks
108  *		    . requires at least 1 HBA with mincomp disks
109  *
110  *		3 - stripe across all available disks on similar HBAs
111  *		    . number of components is driven by # of disk
112  *		    . requires at least mincomp disks
113  *
114  *		4 - stripe across all available HBAs
115  *		    . number of components is driven by # of HBAs
116  *		    . requires at least mincomp HBAs
117  *
118  *		5 - stripe across all available disks on all HBAs
119  *		    . number of components is driven by # of disks
120  *		    . requires at least mincomp disks
121  *
122  *		Each strategy tries to compose a stripe with the
123  *		maximum number of components first then reduces the
124  *		number of components down to mincomp.
125  *
126  *		get allowed minimum number of stripe components
127  *		get allowed maximum number of stripe components
128  *		get available HBAs
129  *
130  *		group HBAs by characteristics
131  *		for (each HBA grouping) and (stripe not composed) {
132  *		    select next HBA group
133  *		    for (strategy[1,2,3]) and (stripe not composed) {
134  *			compose stripe using HBAs in group
135  *		    }
136  *		}
137  *
138  *		if (stripe not composed) {
139  *		    for (strategy[4,5]) and (stripe not composed) {
140  *			compose stripe using all HBAs
141  *		    }
142  *		}
143  *
144  *		if (stripe composed) {
145  *		    append composed stripe to results
146  *		}
147  *
148  */
149 int
150 layout_stripe(
151 	devconfig_t	*request,
152 	uint64_t	nbytes,
153 	dlist_t		**results)
154 {
155 	/*
156 	 * these enums define the # of strategies and the preference order
157 	 * in which they are tried
158 	 */
159 	typedef enum {
160 		STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER = 0,
161 		STRIPE_WITHIN_SIMILAR_HBA,
162 		STRIPE_ACROSS_SIMILAR_HBAS,
163 		N_SIMILAR_HBA_STRATEGIES
164 	} similar_hba_strategy_order_t;
165 
166 	typedef enum {
167 		STRIPE_ACROSS_ANY_HBAS_DISK_PER = 0,
168 		STRIPE_ACROSS_ANY_HBAS,
169 		N_ANY_HBA_STRATEGIES
170 	} any_hba_strategy_order_t;
171 
172 
173 	dlist_t		*usable_hbas = NULL;
174 	dlist_t		*similar_hba_groups = NULL;
175 	dlist_t		*iter = NULL;
176 	devconfig_t	*stripe = NULL;
177 
178 	uint16_t	mincomp	= 0;
179 	uint16_t	maxcomp	= 0;
180 
181 	int		error = 0;
182 
183 	(error = get_usable_hbas(&usable_hbas));
184 	if (error != 0) {
185 	    return (error);
186 	}
187 
188 	print_layout_volume_msg(devconfig_type_to_str(TYPE_STRIPE), nbytes);
189 
190 	if (dlist_length(usable_hbas) == 0) {
191 	    print_no_hbas_msg();
192 	    volume_set_error(gettext("There are no usable HBAs."));
193 	    return (-1);
194 	}
195 
196 	((error = group_similar_hbas(usable_hbas, &similar_hba_groups)) != 0) ||
197 
198 	/*
199 	 * determine the min/max number of stripe components
200 	 * based on the request, the diskset defaults or the
201 	 * global defaults.  These are absolute limits, the
202 	 * actual values are determined by the number of HBAs
203 	 * and/or disks available.
204 	 */
205 	(error = get_stripe_min_comp(request, &mincomp)) ||
206 	(error = get_stripe_max_comp(request, &maxcomp));
207 	if (error != 0) {
208 	    return (error);
209 	}
210 
211 	for (iter = similar_hba_groups;
212 	    (error == 0) && (stripe == NULL) && (iter != NULL);
213 	    iter = iter->next) {
214 
215 	    dlist_t *hbas = (dlist_t *)iter->obj;
216 
217 	    similar_hba_strategy_order_t order;
218 
219 	    for (order = STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER;
220 		(order < N_SIMILAR_HBA_STRATEGIES) &&
221 			(stripe == NULL) && (error == 0);
222 		order++) {
223 
224 		dlist_t *selhbas = NULL;
225 		dlist_t	*disks = NULL;
226 		int	n = 0;
227 
228 		switch (order) {
229 
230 		case STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER:
231 
232 		    error = select_hbas_with_n_disks(
233 			    request, hbas, 1, &selhbas, &disks);
234 
235 		    if (error == 0) {
236 
237 /* BEGIN CSTYLED */
238 oprintf(OUTPUT_TERSE,
239 gettext("  -->Strategy 1: use 1 disk from %d-%d similar HBAs - stripe across HBAs\n"),
240 	mincomp, maxcomp);
241 /* END CSTYLED */
242 
243 			if ((n = dlist_length(selhbas)) >= mincomp) {
244 			    n = ((n > maxcomp) ? maxcomp : n);
245 			    error = compose_stripe(
246 				    request, nbytes, disks, n,
247 				    mincomp, NULL, &stripe);
248 			} else {
249 			    print_insufficient_hbas_msg(n);
250 			}
251 		    }
252 
253 		    break;
254 
255 		case STRIPE_WITHIN_SIMILAR_HBA:
256 
257 		    error = select_hbas_with_n_disks(
258 			    request, hbas, mincomp, &selhbas, &disks);
259 
260 		    if (error == 0) {
261 
262 /* BEGIN CSTYLED */
263 oprintf(OUTPUT_TERSE,
264 gettext("  -->Strategy 2: use %d-%d disks from any single HBA - stripe within HBA\n"),
265 	mincomp, maxcomp);
266 /* END CSTYLED */
267 
268 			if ((n = dlist_length(selhbas)) > 0) {
269 			    error = compose_stripe_within_hba(
270 				    request, selhbas, nbytes,
271 				    mincomp, maxcomp, &stripe);
272 			} else {
273 			    print_insufficient_disks_msg(n);
274 			}
275 		    }
276 
277 		    break;
278 
279 		case STRIPE_ACROSS_SIMILAR_HBAS:
280 
281 		    error = select_hbas_with_n_disks(
282 			    request, hbas, 1, &selhbas, &disks);
283 
284 		    if (error == 0) {
285 
286 /* BEGIN CSTYLED */
287 oprintf(OUTPUT_TERSE,
288 gettext("  -->Strategy 3: use %d-%d disks from %d similar HBAs - stripe across HBAs\n"),
289 	mincomp, maxcomp, dlist_length(hbas));
290 /* END CSTYLED */
291 
292 			if ((n = dlist_length(selhbas)) > 0) {
293 			    if ((n = dlist_length(disks)) >= mincomp) {
294 				n = ((n > maxcomp) ? maxcomp : n);
295 				error = compose_stripe(
296 					request, nbytes, disks, n,
297 					mincomp, NULL, &stripe);
298 			    } else {
299 				print_insufficient_disks_msg(n);
300 			    }
301 			} else {
302 			    print_insufficient_hbas_msg(n);
303 			}
304 		    }
305 
306 		    break;
307 
308 		default:
309 		    break;
310 		}
311 
312 		dlist_free_items(disks, NULL);
313 		dlist_free_items(selhbas, NULL);
314 	    }
315 	}
316 
317 	for (iter = similar_hba_groups; iter != NULL; iter = iter->next) {
318 	    dlist_free_items((dlist_t *)iter->obj, NULL);
319 	}
320 	dlist_free_items(similar_hba_groups, NULL);
321 
322 	/*
323 	 * if striping within similar HBA groups failed,
324 	 * try across all available HBAs
325 	 */
326 	if ((stripe == NULL) && (error == 0)) {
327 
328 	    any_hba_strategy_order_t order;
329 
330 	    for (order = STRIPE_ACROSS_ANY_HBAS_DISK_PER;
331 		(order < N_ANY_HBA_STRATEGIES) &&
332 			(stripe == NULL) && (error == 0);
333 		order++) {
334 
335 		dlist_t	*selhbas = NULL;
336 		dlist_t	*disks = NULL;
337 		int	n = 0;
338 
339 		switch (order) {
340 
341 		case STRIPE_ACROSS_ANY_HBAS_DISK_PER:
342 
343 		    error = select_hbas_with_n_disks(
344 			    request, usable_hbas, 1, &selhbas, &disks);
345 
346 		    if (error == 0) {
347 
348 /* BEGIN CSTYLED */
349 oprintf(OUTPUT_TERSE,
350 gettext("  -->Strategy 4: use 1 disk from %d-%d available HBAs - stripe across any HBAs\n"),
351 	mincomp, maxcomp);
352 /* END CSTYLED */
353 
354 			if ((n = dlist_length(selhbas)) >= mincomp) {
355 
356 			    n = ((n > maxcomp) ? maxcomp : n);
357 			    error = compose_stripe(
358 				    request, nbytes, disks, n,
359 				    mincomp, NULL, &stripe);
360 
361 			} else {
362 			    print_insufficient_hbas_msg(n);
363 			}
364 		    }
365 
366 		    break;
367 
368 		case STRIPE_ACROSS_ANY_HBAS:
369 
370 		    error = select_hbas_with_n_disks(
371 			    request, usable_hbas, 1, &selhbas, &disks);
372 
373 		    if (error == 0) {
374 
375 /* BEGIN CSTYLED */
376 oprintf(OUTPUT_TERSE,
377 gettext("  -->Strategy 5: use %d-%d disks from %d available HBA - stripe across any HBAs\n"),
378 	mincomp, maxcomp, dlist_length(selhbas));
379 /* END CSTYLED */
380 
381 			if ((n = dlist_length(disks)) >= mincomp) {
382 
383 			    n = ((n > maxcomp) ? maxcomp : n);
384 			    error = compose_stripe(
385 				    request, nbytes, disks, n,
386 				    mincomp, NULL, &stripe);
387 
388 			} else {
389 			    print_insufficient_disks_msg(n);
390 			}
391 		    }
392 
393 		    break;
394 		}
395 
396 		dlist_free_items(disks, NULL);
397 		dlist_free_items(selhbas, NULL);
398 	    }
399 	}
400 
401 	if (stripe != NULL) {
402 
403 	    dlist_t *item = NULL;
404 	    if ((item = dlist_new_item(stripe)) == NULL) {
405 		error = ENOMEM;
406 	    } else {
407 		*results = dlist_append(item, *results, AT_TAIL);
408 		print_layout_success_msg();
409 	    }
410 
411 	} else if (error != 0) {
412 
413 	    print_debug_failure_msg(
414 		    devconfig_type_to_str(TYPE_STRIPE),
415 		    get_error_string(error));
416 
417 	} else {
418 
419 	    print_insufficient_resources_msg(
420 		    devconfig_type_to_str(TYPE_STRIPE));
421 	    error = -1;
422 	}
423 
424 	return (error);
425 }
426 
427 /*
428  * FUNCTION:	populate_stripe(devconfig_t *request, uint64_t nbytes,
429  *			dlist_t *disks, uint16_t ncomp, dlist_t *othervols,
430  *			devconfig_t **stripe)
431  *
432  * INPUT:	request	- pointer to a request devconfig_t
433  *		nbytes	- desired stripe size
434  *		disks	- pointer to a list of availalb disks
435  *		ncomp	- number of components desired
436  *		othervols - pointer to a list of other volumes whose
437  *				composition may affect this stripe
438  *				(e.g., submirrors of the same mirror)
439  *
440  * OUTPUT:	stripe	- pointer to a devconfig_t to hold resulting stripe
441  *
442  * RETURNS:	int	- 0 on success
443  *			 !0 otherwise.
444  *
445  * PURPOSE:	Helper to populate a stripe with the specified number of
446  *		components and aggregate capacity using slices on disks
447  *		in the input list.
448  *
449  *		If the othervols list is not empty, the slice components
450  *		chosen for the stripe must not on the same disks as any
451  *		of the other volumes.
452  *
453  *		If sufficient slice components can be found, the stripe
454  *		is assembled and returned.
455  */
456 int
457 populate_stripe(
458 	devconfig_t	*request,
459 	uint64_t	nbytes,
460 	dlist_t		*disks,
461 	uint16_t	ncomp,
462 	dlist_t		*othervols,
463 	devconfig_t	**stripe)
464 {
465 	uint16_t	npaths = 0;
466 	uint16_t	ncomps = 0;	/* number of components found */
467 	uint64_t	rsize = 0;	/* reqd component size */
468 
469 	dlist_t		*other_hbas = NULL;
470 	dlist_t		*other_disks = NULL;
471 
472 	dlist_t		*slices = NULL;
473 	dlist_t		*comps = NULL;
474 
475 	int		error = 0;
476 
477 	*stripe = NULL;
478 
479 	((error = disks_get_avail_slices(request, disks, &slices)) != 0) ||
480 	(error = get_volume_npaths(request, &npaths));
481 	if (error != 0) {
482 	    return (error);
483 	}
484 
485 	print_populate_volume_ncomps_msg(
486 		devconfig_type_to_str(TYPE_STRIPE), nbytes, ncomp);
487 
488 	if (slices == NULL) {
489 	    print_populate_no_slices_msg();
490 	    return (0);
491 	}
492 
493 	/* determine HBAs and disks used by othervols */
494 	error = get_hbas_and_disks_used_by_volumes(othervols,
495 		&other_hbas, &other_disks);
496 	if (error != 0) {
497 	    dlist_free_items(other_hbas, NULL);
498 	    dlist_free_items(other_disks, NULL);
499 	    return (error);
500 	}
501 
502 	print_populate_choose_slices_msg();
503 
504 	/*
505 	 * each stripe component needs to be this size.
506 	 * Note that the stripe interlace doesn't need to be
507 	 * taken into account in this computation because any
508 	 * slice selected as a stripe component will be oversized
509 	 * to account for interlace and cylinder rounding done
510 	 * by libmeta.
511 	 */
512 	rsize = nbytes / ncomp;
513 
514 	/*
515 	 * need to select 'ncomp' slices that are at least 'rsize'
516 	 * large in order to reach the desired capacity.
517 	 */
518 	ncomps = 0;
519 	while ((ncomps < ncomp) && (error == 0)) {
520 
521 	    devconfig_t	*comp = NULL;
522 	    dlist_t	*item = NULL;
523 	    dlist_t	*rmvd = NULL;
524 	    char	*cname = NULL;
525 
526 	    /* BEGIN CSTYLED */
527 	    /*
528 	     * 1st B_TRUE: require a different disk than those used by
529 	     *		comps and othervols
530 	     * 2nd B_TRUE: requested size is minimum acceptable
531 	     * 3rd B_TRUE: add an extra cylinder to the resulting slice, this is
532 	     *		necessary for Stripe components whose sizes get rounded
533 	     *		down to an interlace multiple and then down to a cylinder
534 	     *		boundary.
535 	     */
536 	    /* END CSTYLED */
537 	    error = choose_slice(rsize, npaths, slices, comps,
538 		    other_hbas, other_disks, B_TRUE, B_TRUE, B_TRUE, &comp);
539 
540 	    if ((error == 0) && (comp != NULL)) {
541 
542 		++ncomps;
543 
544 		item = dlist_new_item(comp);
545 		if (item == NULL) {
546 		    error = ENOMEM;
547 		} else {
548 
549 		    /* add selected component to comp list */
550 		    comps = dlist_insert_ordered(
551 			    item,
552 			    comps,
553 			    ASCENDING,
554 			    compare_devconfig_sizes);
555 
556 		    /* remove it from the available list */
557 		    slices = dlist_remove_equivalent_item(slices, (void *) comp,
558 			    compare_devconfig_and_descriptor_names, &rmvd);
559 
560 		    if (rmvd != NULL) {
561 			free(rmvd);
562 		    }
563 
564 		    /* add the component slice to the used list */
565 		    if ((error = devconfig_get_name(comp, &cname)) == 0) {
566 			error = add_used_slice_by_name(cname);
567 		    }
568 		}
569 	    } else if (comp == NULL) {
570 		/* no possible slice */
571 		break;
572 	    }
573 	}
574 
575 	dlist_free_items(slices, NULL);
576 	dlist_free_items(other_hbas, NULL);
577 	dlist_free_items(other_disks, NULL);
578 
579 	if (ncomps == ncomp) {
580 
581 	    if ((error = assemble_stripe(request, comps, stripe)) == 0) {
582 		print_populate_success_msg();
583 	    } else {
584 		dlist_free_items(comps, free_devconfig_object);
585 	    }
586 
587 	} else if (error == 0) {
588 
589 	    if (ncomps > 0) {
590 		print_insufficient_components_msg(ncomps);
591 		dlist_free_items(comps, free_devconfig_object);
592 	    } else {
593 		print_populate_no_slices_msg();
594 	    }
595 
596 	}
597 	return (error);
598 }
599 
600 /*
601  * FUNCTION:	populate_explicit_stripe(devconfig_t *request,
602  *			dlist_t **results)
603  *
604  * INPUT:	request	- pointer to a request devconfig_t
605  *
606  * OUTPUT:	results	- pointer to a list of volume devconfig_t results
607  *
608  * RETURNS:	int	- 0 on success
609  *			 !0 otherwise.
610  *
611  * PURPOSE:	Processes the input stripe request that specifies explicit
612  *		slice components.
613  *
614  *		The components have already been validated and reserved,
615  *		all that is required is to create devconfig_t structs
616  *		for each requested slice.
617  *
618  *		The net size of the stripe is determined by the slice
619  *		components.
620  *
621  *		The stripe devconfig_t is assembled and appended to the
622  *		results list.
623  *
624  *		This function is also called from
625  *		    layout_mirror.populate_explicit_mirror()
626  */
627 int
628 populate_explicit_stripe(
629 	devconfig_t	*request,
630 	dlist_t		**results)
631 {
632 	devconfig_t	*stripe = NULL;
633 	int		error = 0;
634 
635 	dlist_t		*comps = NULL;
636 	dlist_t		*iter = NULL;
637 	dlist_t		*item = NULL;
638 
639 	print_layout_explicit_msg(devconfig_type_to_str(TYPE_STRIPE));
640 
641 	/* assemble components */
642 	iter = devconfig_get_components(request);
643 	for (; (iter != NULL) && (error == 0); iter = iter->next) {
644 	    devconfig_t	*rqst = (devconfig_t *)iter->obj;
645 	    dm_descriptor_t rqst_slice = NULL;
646 	    char	*rqst_name = NULL;
647 	    devconfig_t	*comp = NULL;
648 
649 	    /* slice components have been validated */
650 	    /* turn each into a devconfig_t */
651 	    ((error = devconfig_get_name(rqst, &rqst_name)) != 0) ||
652 	    (error = slice_get_by_name(rqst_name, &rqst_slice)) ||
653 	    (error = create_devconfig_for_slice(rqst_slice, &comp));
654 
655 	    if (error == 0) {
656 
657 		print_layout_explicit_added_msg(rqst_name);
658 
659 		item = dlist_new_item((void *)comp);
660 		if (item == NULL) {
661 		    error = ENOMEM;
662 		} else {
663 		    comps = dlist_append(item, comps, AT_TAIL);
664 		}
665 	    }
666 	}
667 
668 	if (error == 0) {
669 	    error = assemble_stripe(request, comps, &stripe);
670 	}
671 
672 	if (error == 0) {
673 	    if ((item = dlist_new_item(stripe)) == NULL) {
674 		error = ENOMEM;
675 	    } else {
676 		*results = dlist_append(item, *results, AT_TAIL);
677 		print_populate_success_msg();
678 	    }
679 	} else {
680 	    dlist_free_items(comps, free_devconfig);
681 	}
682 
683 	return (error);
684 }
685 
686 /*
687  * FUNCTION:	compose_stripe(devconfig_t *request, uint64_t nbytes,
688  *			dlist_t *disks, uint16_t max, uint16_t min,
689  *			dlist_t *othervols, devconfig_t **stripe)
690  *
691  * INPUT:	request	- pointer to a request devconfig_t
692  *		nbytes	- desired stripe size
693  *		disks	- pointer to a list of availalb disks
694  *		max	- maximum number of components allowed
695  *		min	- minimum number of components allowed
696  *		othervols - pointer to a list of other volumes whose
697  *				composition may affect this stripe
698  *				(e.g., submirrors of the same mirror)
699  *
700  * OUTPUT:	stripe	- pointer to a devconfig_t to hold resulting stripe
701  *
702  * RETURNS:	int	- 0 on success
703  *			 !0 otherwise.
704  *
705  * PURPOSE:	Attempt to compose a stripe of capacity nbytes, with
706  *		component slices chosen from the input list of disks.
707  *		The number of components in the stripe should be in the
708  *		range min <= N <= max, more components are preferred.
709  *
710  *		If a stripe can be composed, a pointer to it will be
711  *		returned in the stripe devconfig_t.
712  *
713  *		This is a loop wrapped around populate_stripe which
714  *		varies the number of components between 'max' and 'min'.
715  */
716 static int
717 compose_stripe(
718 	devconfig_t	*request,
719 	uint64_t	nbytes,
720 	dlist_t		*disks,
721 	int		max,
722 	int		min,
723 	dlist_t		*othervols,
724 	devconfig_t	**stripe)
725 {
726 	int		error = 0;
727 
728 	*stripe = NULL;
729 
730 	for (; (error == 0) && (*stripe == NULL) && (max >= min); max--) {
731 	    error = populate_stripe(
732 		    request, nbytes, disks, max, othervols, stripe);
733 	}
734 
735 	return (error);
736 }
737 
738 /*
739  * FUNCTION:	compose_stripe_within_hba(devconfig_t *request,
740  *			dlist_t *hbas, uint64_t nbytes,
741  *			int maxcomp, int mincomp, dlist_t **stripe)
742  *
743  * INPUT:	request	- pointer to a devconfig_t of the current request
744  *		hbas	- pointer to a list of available HBAs
745  *		nbytes	- the desired capacity for the stripe
746  *		maxcomp - the maximum number of stripe components
747  *		mincomp - the minimum number of stripe components
748  *
749  * OUTPUT:	stripe	- pointer to a stripe devconfig_t result
750  *
751  * RETURNS:	int	- 0 on success
752  *			 !0 otherwise.
753  *
754  * PURPOSE:	Layout function which compose a stripe of the desired size
755  *		using available disks within any single HBA from the input list.
756  *
757  *		The number of components within the composed stripe will be
758  *		in the range of min to max, preferring more components
759  *		over fewer.
760  *
761  * 		All input HBAs are expected to have at least mincomp
762  *		available disks and total space sufficient for the stripe.
763  *
764  *		If the stripe can be composed, a pointer to it is returned in
765  *		the stripe devconfig_t *.
766  *
767  *
768  *		while (more hbas and stripe not composed) {
769  *		    select HBA
770  *		    if (not enough available space on this HBA) {
771  *			continue;
772  *		    }
773  *		    get available disks for HBA
774  *		    use # disks as max # of stripe components
775  *		    try to compose stripe
776  *		}
777  *
778  */
779 static int
780 compose_stripe_within_hba(
781 	devconfig_t	*request,
782 	dlist_t		*hbas,
783 	uint64_t	nbytes,
784 	uint16_t	min,
785 	uint16_t	max,
786 	devconfig_t	**stripe)
787 {
788 	int		error = 0;
789 
790 	dlist_t		*iter = NULL;
791 
792 	*stripe = NULL;
793 
794 	for (iter = hbas;
795 	    (iter != NULL) && (error == 0) && (*stripe == NULL);
796 	    iter = iter->next) {
797 
798 	    dm_descriptor_t hba = (uintptr_t)iter->obj;
799 	    dlist_t	*disks = NULL;
800 	    uint64_t	space = 0;
801 	    uint16_t	ncomp = 0;
802 	    char	*name;
803 
804 	    ((error = get_display_name(hba, &name)) != 0) ||
805 	    (error = hba_get_avail_disks_and_space(request,
806 		    hba, &disks, &space));
807 
808 	    if (error == 0) {
809 		if (space >= nbytes) {
810 		    ncomp = dlist_length(disks);
811 		    ncomp = ((ncomp > max) ? max : ncomp);
812 		    error = compose_stripe(
813 			    request, nbytes, disks, ncomp,
814 			    min, NULL, stripe);
815 		} else {
816 		    print_hba_insufficient_space_msg(name, space);
817 		}
818 	    }
819 
820 	    dlist_free_items(disks, NULL);
821 	}
822 
823 	return (error);
824 }
825 
826 /*
827  * FUNCTION:	assemble_stripe(devconfig_t *request, dlist_t *comps,
828  *			devconfig_t **stripe)
829  *
830  * INPUT:	request	- pointer to a devconfig_t of the current request
831  *		comps	- pointer to a list of slice components
832  *
833  * OUPUT:	stripe	- pointer to a devconfig_t to hold final stripe
834  *
835  * RETURNS:	int	- 0 on success
836  *			 !0 otherwise.
837  *
838  * PURPOSE:	Helper which creates and populates a stripe devconfig_t
839  *		struct using information from the input request and the
840  *		list of slice components.
841  *
842  *		Determines the name of the stripe either from the request
843  *		or from the default naming scheme.
844  *
845  *		Sets the interlace for the stripe if a value is specified
846  *		in the request.
847  *
848  *		Attaches the input list of components to the devconfig.
849  */
850 static int
851 assemble_stripe(
852 	devconfig_t	*request,
853 	dlist_t		*comps,
854 	devconfig_t	**stripe)
855 {
856 	uint64_t ilace = 0;
857 	char	*name = NULL;
858 	int	error = 0;
859 
860 	if ((error = new_devconfig(stripe, TYPE_STRIPE)) == 0) {
861 	    /* set stripe name, use requested name if specified */
862 	    if ((error = devconfig_get_name(request, &name)) != 0) {
863 		if (error != ERR_ATTR_UNSET) {
864 		    volume_set_error(gettext("error getting requested name\n"));
865 		} else {
866 		    error = 0;
867 		}
868 	    }
869 
870 	    if (error == 0) {
871 		if (name == NULL) {
872 		    if ((error = get_next_volume_name(&name,
873 			TYPE_STRIPE)) == 0) {
874 			error = devconfig_set_name(*stripe, name);
875 			free(name);
876 		    }
877 		} else {
878 		    error = devconfig_set_name(*stripe, name);
879 		}
880 	    }
881 	}
882 
883 	if (error == 0) {
884 	    if ((error = get_stripe_interlace(request, &ilace)) == 0) {
885 		error = devconfig_set_stripe_interlace(*stripe, ilace);
886 	    } else if (error == ENOENT) {
887 		ilace = get_default_stripe_interlace();
888 		error = 0;
889 	    }
890 	}
891 
892 	if (error == 0) {
893 	    uint64_t	nbytes = 0;
894 	    if ((error = compute_usable_stripe_capacity(comps,
895 		ilace, &nbytes)) == 0) {
896 		error = devconfig_set_size_in_blocks(*stripe, nbytes/DEV_BSIZE);
897 	    }
898 	}
899 
900 	if (error == 0) {
901 	    comps = order_stripe_components_alternate_hbas(comps);
902 	    devconfig_set_components(*stripe, comps);
903 	} else {
904 	    free_devconfig(*stripe);
905 	    *stripe = NULL;
906 	}
907 
908 	return (error);
909 }
910 
911 /*
912  * Order the given stripe component list such that the number of
913  * slices on the same hba adjacent to each other in the list are
914  * minimized.
915  *
916  * @param       comps
917  *              the slice component list to order
918  *
919  * @return      the first element of the resulting list
920  */
921 static dlist_t *
922 order_stripe_components_alternate_hbas(
923 	dlist_t *comps)
924 {
925 	dlist_t *iter;
926 
927 	oprintf(OUTPUT_DEBUG,
928 	    gettext("Stripe components before ordering to alternate HBAs:\n"));
929 
930 	for (iter = comps; iter != NULL; iter = iter->next) {
931 	    devconfig_t *slice = (devconfig_t *)(iter->obj);
932 	    char *name;
933 	    devconfig_get_name(slice, &name);
934 	    oprintf(OUTPUT_DEBUG, "  %s\n", name);
935 	}
936 
937 	return (dlist_separate_similar_elements(
938 	    comps, compare_slices_on_same_hba));
939 }
940 
941 /*
942  * FUNCTION:	compute_usable_stripe_capacity(dlist_t *comps, uint64_t ilace,
943  *			uint64_t *nbytes)
944  *
945  * INPUT:	comps	- pointer to a list of stripe components
946  *		ilace	- the expected stripe interlace in bytes
947  *
948  * OUPUT:	nbytes	- pointer to hold the computed capacity
949  *
950  * RETURNS:	int	- 0 on success
951  *			 !0 otherwise.
952  *
953  * PURPOSE:	Helper which computes the usable size of a stripe taking
954  *		into account the interlace and cylinder rounding that
955  *		libmeta uses: a stripe component's size is rounded down to
956  *		an integral multiple of the interlace and then rounded down
957  *		to a cylinder boundary on VTOC labeled disks.
958  *
959  *		(These libmeta computations are in the meta_stripe_attach()
960  *		 function of .../lib/lvm/libmeta/common/meta_stripe.c and
961  *		 meta_adjust_geom() in .../lib/lvm/libmeta/common/meta_init.c)
962  *
963  *		This function's implementation iterates the input list of
964  *		stripe component slices and determines the smallest usable
965  *		component capacity.
966  *
967  *		The usable stripe capacity is then that component capacity
968  *		times the number of components.
969  */
970 static int
971 compute_usable_stripe_capacity(
972 	dlist_t		*comps,
973 	uint64_t	ilace,
974 	uint64_t	*nbytes)
975 {
976 	uint64_t	bytes_per_component = 0;
977 	dlist_t		*iter;
978 	int		ncomps = 0;
979 	int		error = 0;
980 
981 	for (iter = comps; (iter != NULL) && (error == 0); iter = iter->next) {
982 
983 	    devconfig_t		*comp = (devconfig_t *)iter->obj;
984 	    char		*comp_name = NULL;
985 	    uint64_t 		comp_nbytes = 0;
986 	    dm_descriptor_t	comp_disk;
987 	    boolean_t		comp_disk_efi = B_FALSE;
988 	    uint64_t 		comp_disk_bps = 0; /* disk bytes per sector */
989 
990 	    ((error = devconfig_get_size(comp, &comp_nbytes)) != 0) ||
991 	    (error = devconfig_get_name(comp, &comp_name)) ||
992 	    (error = get_disk_for_named_slice(comp_name, &comp_disk)) ||
993 	    (error = disk_get_blocksize(comp_disk, &comp_disk_bps)) ||
994 	    (error = disk_get_is_efi(comp_disk, &comp_disk_efi));
995 	    if (error == 0) {
996 
997 		if (comp_disk_efi == B_FALSE) {
998 		    uint64_t	nhead = 0;
999 		    uint64_t	nsect = 0;
1000 		    uint64_t	ncyls = 0;
1001 
1002 		    /* do cylinder and interlace rounding for non-EFI disks */
1003 		    ((error = disk_get_ncylinders(comp_disk, &ncyls)) != 0) ||
1004 		    (error = disk_get_nheads(comp_disk, &nhead)) ||
1005 		    (error = disk_get_nsectors(comp_disk, &nsect));
1006 		    if (error == 0) {
1007 			/* compute bytes per cyl */
1008 			uint64_t bpc = nhead * nsect * comp_disk_bps;
1009 
1010 			/* round nbytes down to a multiple of interlace */
1011 			comp_nbytes = (comp_nbytes / ilace) * ilace;
1012 
1013 			/* round nbytes down to a cylinder boundary */
1014 			comp_nbytes = (comp_nbytes / bpc) * bpc;
1015 		    }
1016 		}
1017 
1018 		/* save smallest component size */
1019 		if ((bytes_per_component == 0) ||
1020 		    (comp_nbytes < bytes_per_component)) {
1021 		    bytes_per_component = comp_nbytes;
1022 		}
1023 
1024 		++ncomps;
1025 	    }
1026 	}
1027 
1028 	if (error == 0) {
1029 	    /* size of stripe = smallest component size * n components */
1030 	    *nbytes = (bytes_per_component * ncomps);
1031 	}
1032 
1033 	return (error);
1034 }
1035