1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Just in case we're not in a build environment, make sure that
28 * TEXT_DOMAIN gets set to something.
29 */
30 #if !defined(TEXT_DOMAIN)
31 #define TEXT_DOMAIN "SYS_TEST"
32 #endif
33
34 /*
35 * soft partition operations
36 *
37 * Soft Partitions provide a virtual disk mechanism which is used to
38 * divide a large volume into many small pieces, each appearing as a
39 * separate device. A soft partition consists of a series of extents,
40 * each having an offset and a length. The extents are logically
41 * contiguous, so where the first extent leaves off the second extent
42 * picks up. Which extent a given "virtual offset" belongs to is
43 * dependent on the size of all the previous extents in the soft
44 * partition.
45 *
46 * Soft partitions are represented in memory by an extent node
47 * (sp_ext_node_t) which contains all of the information necessary to
48 * create a unit structure and update the on-disk format, called
49 * "watermarks". These extent nodes are typically kept in a doubly
50 * linked list and are manipulated by list manipulation routines. A
51 * list of extents may represent all of the soft partitions on a volume,
52 * a single soft partition, or perhaps just a set of extents that need
53 * to be updated. Extent lists may be sorted by extent or by name/seq#,
54 * depending on which compare function is used. Most of the routines
55 * require the list be sorted by offset to work, and that's the typical
56 * configuration.
57 *
58 * In order to do an allocation, knowledge of all soft partitions on the
59 * volume is required. Then free space is determined from the space
60 * that is not allocated, and new allocations can be made from the free
61 * space. Once the new allocations are made, a unit structure is created
62 * and the watermarks are updated. The status is then changed to "okay"
63 * on the unit structure to commit the transaction. If updating the
64 * watermarks fails, the unit structure is in an intermediate state and
65 * the driver will not allow access to the device.
66 *
67 * A typical sequence of events is:
68 * 1. Fetch the list of names for all soft partitions on a volume
69 * meta_sp_get_by_component()
70 * 2. Construct an extent list from the name list
71 * meta_sp_extlist_from_namelist()
72 * 3. Fill the gaps in the extent list with free extents
73 * meta_sp_list_freefill()
74 * 4. Allocate from the free extents
75 * meta_sp_alloc_by_len()
76 * meta_sp_alloc_by_list()
77 * 5. Create the unit structure from the extent list
78 * meta_sp_createunit()
79 * meta_sp_updateunit()
80 * 6. Write out the watermarks
81 * meta_sp_update_wm()
82 * 7. Set the status to "Okay"
83 * meta_sp_setstatus()
84 *
85 */
86
87 #include <stdio.h>
88 #include <meta.h>
89 #include "meta_repartition.h"
90 #include <sys/lvm/md_sp.h>
91 #include <sys/lvm/md_crc.h>
92 #include <strings.h>
93 #include <sys/lvm/md_mirror.h>
94 #include <sys/bitmap.h>
95
96 extern int md_in_daemon;
97
98 typedef struct sp_ext_node {
99 struct sp_ext_node *ext_next; /* next element */
100 struct sp_ext_node *ext_prev; /* previous element */
101 sp_ext_type_t ext_type; /* type of extent */
102 sp_ext_offset_t ext_offset; /* starting offset */
103 sp_ext_length_t ext_length; /* length of this node */
104 uint_t ext_flags; /* extent flags */
105 uint32_t ext_seq; /* watermark seq no */
106 mdname_t *ext_namep; /* name pointer */
107 mdsetname_t *ext_setp; /* set pointer */
108 } sp_ext_node_t;
109
110 /* extent flags */
111 #define EXTFLG_UPDATE (1)
112
113 /* Extent node compare function for list sorting */
114 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *);
115
116
117 /* Function Prototypes */
118
119 /* Debugging Functions */
120 static void meta_sp_debug(char *format, ...);
121 static void meta_sp_printunit(mp_unit_t *mp);
122
123 /* Misc Support Functions */
124 int meta_sp_parsesize(char *s, sp_ext_length_t *szp);
125 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp);
126 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp,
127 md_error_t *ep);
128 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp,
129 mdnamelist_t **nlpp, int force, md_error_t *ep);
130 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp,
131 mdname_t *compnp, md_error_t *ep);
132
133 /* Extent List Manipulation Functions */
134 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2);
135 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2);
136 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np,
137 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length,
138 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare);
139 static void meta_sp_list_free(sp_ext_node_t **head);
140 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext);
141 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head,
142 sp_ext_type_t exttype, int exclude_wm);
143 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head,
144 sp_ext_offset_t offset);
145 static void meta_sp_list_freefill(sp_ext_node_t **extlist,
146 sp_ext_length_t size);
147 static void meta_sp_list_dump(sp_ext_node_t *head);
148 static int meta_sp_list_overlaps(sp_ext_node_t *head);
149
150 /* Extent List Query Functions */
151 static boolean_t meta_sp_enough_space(int desired_number_of_sps,
152 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp,
153 sp_ext_length_t alignment);
154 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep,
155 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp,
156 md_error_t *ep);
157 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep,
158 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp);
159
160
161 /* Extent Allocation Functions */
162 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np,
163 sp_ext_node_t **extlist, sp_ext_node_t *free_ext,
164 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq);
165 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np,
166 sp_ext_node_t **extlist, sp_ext_length_t *lp,
167 sp_ext_offset_t last_off, sp_ext_length_t alignment);
168 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np,
169 sp_ext_node_t **extlist, sp_ext_node_t *oblist);
170
171 /* Extent List Population Functions */
172 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp,
173 sp_ext_node_t **extlist, md_error_t *ep);
174 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp,
175 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep);
176
177 /* Print (metastat) Functions */
178 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp,
179 mdprtopts_t options, md_error_t *ep);
180 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate);
181 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp,
182 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep);
183
184 /* Watermark Manipulation Functions */
185 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp,
186 sp_ext_node_t *extlist, md_error_t *ep);
187 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep);
188 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp,
189 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep);
190 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp,
191 md_error_t *ep);
192
193 /* Unit Structure Manipulation Functions */
194 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist);
195 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp,
196 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len,
197 sp_status_t status, md_error_t *ep);
198 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un,
199 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts,
200 md_error_t *ep);
201 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist,
202 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep);
203 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options,
204 int *repart_options, md_error_t *ep);
205
206 /* Reset (metaclear) Functions */
207 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp,
208 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep);
209
210 /* Recovery (metarecover) Functions */
211 static void meta_sp_display_exthdr(void);
212 static void meta_sp_display_ext(sp_ext_node_t *ext);
213 static int meta_sp_checkseq(sp_ext_node_t *extlist);
214 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *,
215 mdname_t **, md_error_t *);
216 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np,
217 mdcmdopts_t options, md_error_t *ep);
218 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp,
219 mdcmdopts_t options, md_error_t *ep);
220 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np,
221 mdcmdopts_t options, md_error_t *ep);
222 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext,
223 sp_ext_node_t *unitext, md_error_t *ep);
224 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp,
225 mdcmdopts_t options, md_error_t *ep);
226 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np,
227 mdcmdopts_t options, md_error_t *ep);
228
229 /*
230 * Private Constants
231 */
232
233 static const int FORCE_RELOAD_CACHE = 1;
234 static const uint_t NO_FLAGS = 0;
235 static const sp_ext_offset_t NO_OFFSET = 0ULL;
236 static const uint_t NO_SEQUENCE_NUMBER = 0;
237 static const int ONE_SOFT_PARTITION = 1;
238
239 static unsigned long *sp_parent_printed[MD_MAXSETS];
240
241 #define TEST_SOFT_PARTITION_NAMEP NULL
242 #define TEST_SETNAMEP NULL
243
244 #define EXCLUDE_WM (1)
245 #define INCLUDE_WM (0)
246
247 #define SP_UNALIGNED (0LL)
248
249 /*
250 * **************************************************************************
251 * Debugging Functions *
252 * **************************************************************************
253 */
254
255 /*PRINTFLIKE1*/
256 static void
meta_sp_debug(char * format,...)257 meta_sp_debug(char *format, ...)
258 {
259 static int debug;
260 static int debug_set = 0;
261 va_list ap;
262
263 if (!debug_set) {
264 debug = getenv(META_SP_DEBUG) ? 1 : 0;
265 debug_set = 1;
266 }
267
268 if (debug) {
269 va_start(ap, format);
270 (void) vfprintf(stderr, format, ap);
271 va_end(ap);
272 }
273 }
274
275 static void
meta_sp_printunit(mp_unit_t * mp)276 meta_sp_printunit(mp_unit_t *mp)
277 {
278 int i;
279
280 if (mp == NULL)
281 return;
282
283 /* print the common fields we know about */
284 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type);
285 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size);
286 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp));
287
288 /* sp-specific fields */
289 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status);
290 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts);
291 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length);
292 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev);
293 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev);
294 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key);
295
296 /* print extent information */
297 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n");
298 for (i = 0; i < mp->un_numexts; i++) {
299 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i,
300 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff,
301 mp->un_ext[i].un_len);
302 }
303 }
304
305 /*
306 * FUNCTION: meta_sp_parsesize()
307 * INPUT: s - the string to parse
308 * OUTPUT: *szp - disk block count (0 for "all")
309 * RETURNS: -1 for error, 0 for success
310 * PURPOSE: parses the command line parameter that specifies the
311 * requested size of a soft partition. The input string
312 * is either the literal "all" or a numeric value
313 * followed by a single character, b for disk blocks, k
314 * for kilobytes, m for megabytes, g for gigabytes, or t
315 * for terabytes. p for petabytes and e for exabytes
316 * have been added as undocumented features for future
317 * expansion. For example, 100m is 100 megabytes, while
318 * 50g is 50 gigabytes. All values are rounded up to the
319 * nearest block size.
320 */
321 int
meta_sp_parsesize(char * s,sp_ext_length_t * szp)322 meta_sp_parsesize(char *s, sp_ext_length_t *szp)
323 {
324 if (s == NULL || szp == NULL) {
325 return (-1);
326 }
327
328 /* Check for literal "all" */
329 if (strcasecmp(s, "all") == 0) {
330 *szp = 0;
331 return (0);
332 }
333
334 return (meta_sp_parsesizestring(s, szp));
335 }
336
337 /*
338 * FUNCTION: meta_sp_parsesizestring()
339 * INPUT: s - the string to parse
340 * OUTPUT: *szp - disk block count
341 * RETURNS: -1 for error, 0 for success
342 * PURPOSE: parses a string that specifies size. The input string is a
343 * numeric value followed by a single character, b for disk blocks,
344 * k for kilobytes, m for megabytes, g for gigabytes, or t for
345 * terabytes. p for petabytes and e for exabytes have been added
346 * as undocumented features for future expansion. For example,
347 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values
348 * are rounded up to the nearest block size.
349 */
350 static int
meta_sp_parsesizestring(char * s,sp_ext_length_t * szp)351 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp)
352 {
353 sp_ext_length_t len = 0;
354 char len_type[2];
355
356 if (s == NULL || szp == NULL) {
357 return (-1);
358 }
359
360 /*
361 * make sure block offset does not overflow 2^64 bytes.
362 */
363 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) ||
364 (len == 0LL) ||
365 (len > (1LL << (64 - DEV_BSHIFT))))
366 return (-1);
367
368 switch (len_type[0]) {
369 case 'B':
370 case 'b':
371 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE));
372 break;
373 case 'K':
374 case 'k':
375 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE));
376 break;
377 case 'M':
378 case 'm':
379 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE));
380 break;
381 case 'g':
382 case 'G':
383 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE));
384 break;
385 case 't':
386 case 'T':
387 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL,
388 DEV_BSIZE));
389 break;
390 case 'p':
391 case 'P':
392 len = lbtodb(roundup(
393 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
394 DEV_BSIZE));
395 break;
396 case 'e':
397 case 'E':
398 len = lbtodb(roundup(
399 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
400 DEV_BSIZE));
401 break;
402 default:
403 /* error */
404 return (-1);
405 }
406
407 *szp = len;
408 return (0);
409 }
410
411 /*
412 * FUNCTION: meta_sp_setgeom()
413 * INPUT: np - the underlying device to setup geometry for
414 * compnp - the underlying device to setup geometry for
415 * mp - the unit structure to set the geometry for
416 * OUTPUT: ep - return error pointer
417 * RETURNS: int - -1 if error, 0 otherwise
418 * PURPOSE: establishes geometry information for a device
419 */
420 static int
meta_sp_setgeom(mdname_t * np,mdname_t * compnp,mp_unit_t * mp,md_error_t * ep)421 meta_sp_setgeom(
422 mdname_t *np,
423 mdname_t *compnp,
424 mp_unit_t *mp,
425 md_error_t *ep
426 )
427 {
428 mdgeom_t *geomp;
429 uint_t round_cyl = 0;
430
431 if ((geomp = metagetgeom(compnp, ep)) == NULL)
432 return (-1);
433 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct,
434 geomp->read_reinstruct, round_cyl, ep) != 0)
435 return (-1);
436
437 return (0);
438 }
439
440 /*
441 * FUNCTION: meta_sp_setstatus()
442 * INPUT: sp - the set name for the devices to set the status on
443 * minors - an array of minor numbers of devices to set status on
444 * num_units - number of entries in the array
445 * status - status value to set all units to
446 * OUTPUT: ep - return error pointer
447 * RETURNS: int - -1 if error, 0 success
448 * PURPOSE: sets the status of one or more soft partitions to the
449 * requested value
450 */
451 int
meta_sp_setstatus(mdsetname_t * sp,minor_t * minors,int num_units,sp_status_t status,md_error_t * ep)452 meta_sp_setstatus(
453 mdsetname_t *sp,
454 minor_t *minors,
455 int num_units,
456 sp_status_t status,
457 md_error_t *ep
458 )
459 {
460 md_sp_statusset_t status_params;
461
462 assert(minors != NULL);
463
464 /* update status of all soft partitions to the status passed in */
465 (void) memset(&status_params, 0, sizeof (status_params));
466 status_params.num_units = num_units;
467 status_params.new_status = status;
468 status_params.size = num_units * sizeof (minor_t);
469 status_params.minors = (uintptr_t)minors;
470 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno);
471 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde,
472 NULL) != 0) {
473 (void) mdstealerror(ep, &status_params.mde);
474 return (-1);
475 }
476 return (0);
477 }
478
479 /*
480 * FUNCTION: meta_get_sp_names()
481 * INPUT: sp - the set name to get soft partitions from
482 * options - options from the command line
483 * OUTPUT: nlpp - list of all soft partition names
484 * ep - return error pointer
485 * RETURNS: int - -1 if error, 0 success
486 * PURPOSE: returns a list of all soft partitions in the metadb
487 * for all devices in the specified set
488 */
489 int
meta_get_sp_names(mdsetname_t * sp,mdnamelist_t ** nlpp,int options,md_error_t * ep)490 meta_get_sp_names(
491 mdsetname_t *sp,
492 mdnamelist_t **nlpp,
493 int options,
494 md_error_t *ep
495 )
496 {
497 return (meta_get_names(MD_SP, sp, nlpp, options, ep));
498 }
499
500 /*
501 * FUNCTION: meta_get_by_component()
502 * INPUT: sp - the set name to get soft partitions from
503 * compnp - the name of the device containing the soft
504 * partitions that will be returned
505 * force - 0 - reads cached namelist if available,
506 * 1 - reloads cached namelist, frees old namelist
507 * OUTPUT: nlpp - list of all soft partition names
508 * ep - return error pointer
509 * RETURNS: int - -1 error, otherwise the number of soft partitions
510 * found on the component (0 = none found).
511 * PURPOSE: returns a list of all soft partitions on a given device
512 * from the metadb information
513 */
514 static int
meta_sp_get_by_component(mdsetname_t * sp,mdname_t * compnp,mdnamelist_t ** nlpp,int force,md_error_t * ep)515 meta_sp_get_by_component(
516 mdsetname_t *sp,
517 mdname_t *compnp,
518 mdnamelist_t **nlpp,
519 int force,
520 md_error_t *ep
521 )
522 {
523 static mdnamelist_t *cached_list = NULL; /* cached namelist */
524 static int cached_count = 0; /* cached count */
525 mdnamelist_t *spnlp = NULL; /* all sp names */
526 mdnamelist_t *namep; /* list iterator */
527 mdnamelist_t **tailpp = nlpp; /* namelist tail */
528 mdnamelist_t **cachetailpp; /* cache tail */
529 md_sp_t *msp; /* unit structure */
530 int count = 0; /* count of sp's */
531 int err;
532 mdname_t *curnp;
533
534 if ((cached_list != NULL) && (!force)) {
535 /* return a copy of the cached list */
536 for (namep = cached_list; namep != NULL; namep = namep->next)
537 tailpp = meta_namelist_append_wrapper(tailpp,
538 namep->namep);
539 return (cached_count);
540 }
541
542 /* free the cache and reset values to zeros to prepare for a new list */
543 metafreenamelist(cached_list);
544 cached_count = 0;
545 cached_list = NULL;
546 cachetailpp = &cached_list;
547 *nlpp = NULL;
548
549 /* get all the softpartitions first of all */
550 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
551 return (-1);
552
553 /*
554 * Now for each sp, see if it resides on the component we
555 * are interested in, if so then add it to our list
556 */
557 for (namep = spnlp; namep != NULL; namep = namep->next) {
558 curnp = namep->namep;
559
560 /* get the unit structure */
561 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
562 continue;
563
564 /*
565 * If the current soft partition is not on the same
566 * component, continue the search. If it is on the same
567 * component, add it to our namelist.
568 */
569 err = meta_check_samedrive(compnp, msp->compnamep, ep);
570 if (err <= 0) {
571 /* not on the same device, check the next one */
572 continue;
573 }
574
575 /* it's on the same drive */
576
577 /*
578 * Check for overlapping partitions if the component is not
579 * a metadevice.
580 */
581 if (!metaismeta(msp->compnamep)) {
582 /*
583 * if they're on the same drive, neither
584 * should be a metadevice if one isn't
585 */
586 assert(!metaismeta(compnp));
587
588 if (meta_check_overlap(msp->compnamep->cname,
589 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0)
590 continue;
591
592 /* in this case it's not an error for them to overlap */
593 mdclrerror(ep);
594 }
595
596 /* Component is on the same device, add to the used list */
597 tailpp = meta_namelist_append_wrapper(tailpp, curnp);
598 cachetailpp = meta_namelist_append_wrapper(cachetailpp,
599 curnp);
600
601 ++count;
602 ++cached_count;
603 }
604
605 assert(count == cached_count);
606 return (count);
607
608 out:
609 metafreenamelist(*nlpp);
610 *nlpp = NULL;
611 return (-1);
612 }
613
614 /*
615 * FUNCTION: meta_sp_get_default_alignment()
616 * INPUT: sp - the pertinent set name
617 * compnp - the name of the underlying component
618 * OUTPUT: ep - return error pointer
619 * RETURNS: sp_ext_length_t =0: no default alignment
620 * >0: default alignment
621 * PURPOSE: returns the default alignment for soft partitions to
622 * be built on top of the specified component or
623 * metadevice
624 */
625 static sp_ext_length_t
meta_sp_get_default_alignment(mdsetname_t * sp,mdname_t * compnp,md_error_t * ep)626 meta_sp_get_default_alignment(
627 mdsetname_t *sp,
628 mdname_t *compnp,
629 md_error_t *ep
630 )
631 {
632 sp_ext_length_t a = SP_UNALIGNED;
633 char *mname;
634
635 assert(compnp != NULL);
636
637 /*
638 * We treat raw devices as opaque, and assume nothing about
639 * their alignment requirements.
640 */
641 if (!metaismeta(compnp))
642 return (SP_UNALIGNED);
643
644 /*
645 * We already know it's a metadevice from the previous test;
646 * metagetmiscname() will tell us which metadevice type we
647 * have
648 */
649 mname = metagetmiscname(compnp, ep);
650 if (mname == NULL)
651 goto out;
652
653 /*
654 * For a mirror, we want to deal with the stripe that is the
655 * primary side. If it happens to be asymmetrically
656 * configured, there is no simple way to fake a universal
657 * alignment. There's a chance that the least common
658 * denominator of the set of interlaces from all stripes of
659 * all submirrors would do it, but nobody that really cared
660 * that much about this issue would create an asymmetric
661 * config to start with.
662 *
663 * If the component underlying the soft partition is a mirror,
664 * then at the exit of this loop, compnp will have been
665 * updated to describe the first active submirror.
666 */
667 if (strcmp(mname, MD_MIRROR) == 0) {
668 md_mirror_t *mp;
669 int smi;
670 md_submirror_t *smp;
671
672 mp = meta_get_mirror(sp, compnp, ep);
673 if (mp == NULL)
674 goto out;
675
676 for (smi = 0; smi < NMIRROR; smi++) {
677
678 smp = &mp->submirrors[smi];
679 if (smp->state == SMS_UNUSED)
680 continue;
681
682 compnp = smp->submirnamep;
683 assert(compnp != NULL);
684
685 mname = metagetmiscname(compnp, ep);
686 if (mname == NULL)
687 goto out;
688
689 break;
690 }
691
692 if (smi == NMIRROR)
693 goto out;
694 }
695
696 /*
697 * Handle stripes and submirrors identically; just return the
698 * interlace of the first row.
699 */
700 if (strcmp(mname, MD_STRIPE) == 0) {
701 md_stripe_t *stp;
702
703 stp = meta_get_stripe(sp, compnp, ep);
704 if (stp == NULL)
705 goto out;
706
707 a = stp->rows.rows_val[0].interlace;
708 goto out;
709 }
710
711 /*
712 * Raid is even more straightforward; the interlace applies to
713 * the entire device.
714 */
715 if (strcmp(mname, MD_RAID) == 0) {
716 md_raid_t *rp;
717
718 rp = meta_get_raid(sp, compnp, ep);
719 if (rp == NULL)
720 goto out;
721
722 a = rp->interlace;
723 goto out;
724 }
725
726 /*
727 * If we have arrived here with the alignment still not set,
728 * then we expect the error to have been set by one of the
729 * routines we called. If neither is the case, something has
730 * really gone wrong above. (Probably the submirror walk
731 * failed to produce a valid submirror, but that would be
732 * really bad...)
733 */
734 out:
735 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, "
736 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a);
737
738 if (getenv(META_SP_DEBUG) && !mdisok(ep)) {
739 mde_perror(ep, NULL);
740 }
741
742 assert((a > 0) || (!mdisok(ep)));
743
744 return (a);
745 }
746
747
748
749 /*
750 * FUNCTION: meta_check_insp()
751 * INPUT: sp - the set name for the device to check
752 * np - the name of the device to check
753 * slblk - the starting offset of the device to check
754 * nblks - the number of blocks in the device to check
755 * OUTPUT: ep - return error pointer
756 * RETURNS: int - 0 - device contains soft partitions
757 * -1 - device does not contain soft partitions
758 * PURPOSE: determines whether a device contains any soft partitions
759 */
760 /* ARGSUSED */
761 int
meta_check_insp(mdsetname_t * sp,mdname_t * np,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)762 meta_check_insp(
763 mdsetname_t *sp,
764 mdname_t *np,
765 diskaddr_t slblk,
766 diskaddr_t nblks,
767 md_error_t *ep
768 )
769 {
770 mdnamelist_t *spnlp = NULL; /* soft partition name list */
771 int count;
772 int rval;
773
774 /* check set pointer */
775 assert(sp != NULL);
776
777 /*
778 * Get a list of the soft partitions that currently reside on
779 * the component. We should ALWAYS force reload the cache,
780 * because if we're using the md.tab, we must rebuild
781 * the list because it won't contain the previous (if any)
782 * soft partition.
783 */
784 /* find all soft partitions on the component */
785 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep);
786
787 if (count == -1) {
788 rval = -1;
789 } else if (count > 0) {
790 rval = mduseerror(ep, MDE_ALREADY, np->dev,
791 spnlp->namep->cname, np->cname);
792 } else {
793 rval = 0;
794 }
795
796 metafreenamelist(spnlp);
797 return (rval);
798 }
799
800 /*
801 * **************************************************************************
802 * Extent List Manipulation Functions *
803 * **************************************************************************
804 */
805
806 /*
807 * FUNCTION: meta_sp_cmp_by_nameseq()
808 * INPUT: e1 - first node to compare
809 * e2 - second node to compare
810 * OUTPUT: none
811 * RETURNS: int - =0 - nodes are equal
812 * <0 - e1 should go before e2
813 * >0 - e1 should go after e2
814 * PURPOSE: used for sorted list inserts to build a list sorted by
815 * name first and sequence number second.
816 */
817 static int
meta_sp_cmp_by_nameseq(sp_ext_node_t * e1,sp_ext_node_t * e2)818 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2)
819 {
820 int rval;
821
822 if (e1->ext_namep == NULL)
823 return (1);
824 if (e2->ext_namep == NULL)
825 return (-1);
826 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0)
827 return (rval);
828
829 /* the names are equal, compare sequence numbers */
830 if (e1->ext_seq > e2->ext_seq)
831 return (1);
832 if (e1->ext_seq < e2->ext_seq)
833 return (-1);
834 /* sequence numbers are also equal */
835 return (0);
836 }
837
838 /*
839 * FUNCTION: meta_sp_cmp_by_offset()
840 * INPUT: e1 - first node to compare
841 * e2 - second node to compare
842 * OUTPUT: none
843 * RETURNS: int - =0 - nodes are equal
844 * <0 - e1 should go before e2
845 * >0 - e1 should go after e2
846 * PURPOSE: used for sorted list inserts to build a list sorted by offset
847 */
848 static int
meta_sp_cmp_by_offset(sp_ext_node_t * e1,sp_ext_node_t * e2)849 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2)
850 {
851 if (e1->ext_offset > e2->ext_offset)
852 return (1);
853 if (e1->ext_offset < e2->ext_offset)
854 return (-1);
855 /* offsets are equal */
856 return (0);
857 }
858
859 /*
860 * FUNCTION: meta_sp_list_insert()
861 * INPUT: sp - the set name for the device the node belongs to
862 * np - the name of the device the node belongs to
863 * head - the head of the list, must be NULL for empty list
864 * offset - the physical offset of this extent in sectors
865 * length - the length of this extent in sectors
866 * type - the type of the extent being inserted
867 * seq - the sequence number of the extent being inserted
868 * flags - extent flags (eg. whether it needs to be updated)
869 * compare - the compare function to use
870 * OUTPUT: head - points to the new head if a node was inserted
871 * at the beginning
872 * RETURNS: void
873 * PURPOSE: inserts an extent node into a sorted doubly linked list.
874 * The sort order is determined by the compare function.
875 * Memory is allocated for the node in this function and it
876 * is up to the caller to free it, possibly using
877 * meta_sp_list_free(). If a node is inserted at the
878 * beginning of the list, the head pointer is updated to
879 * point to the new first node.
880 */
881 static void
meta_sp_list_insert(mdsetname_t * sp,mdname_t * np,sp_ext_node_t ** head,sp_ext_offset_t offset,sp_ext_length_t length,sp_ext_type_t type,uint_t seq,uint_t flags,ext_cmpfunc_t compare)882 meta_sp_list_insert(
883 mdsetname_t *sp,
884 mdname_t *np,
885 sp_ext_node_t **head,
886 sp_ext_offset_t offset,
887 sp_ext_length_t length,
888 sp_ext_type_t type,
889 uint_t seq,
890 uint_t flags,
891 ext_cmpfunc_t compare
892 )
893 {
894 sp_ext_node_t *newext;
895 sp_ext_node_t *curext;
896
897 assert(head != NULL);
898
899 /* Don't bother adding zero length nodes */
900 if (length == 0ULL)
901 return;
902
903 /* allocate and fill in new ext_node */
904 newext = Zalloc(sizeof (sp_ext_node_t));
905
906 newext->ext_offset = offset;
907 newext->ext_length = length;
908 newext->ext_flags = flags;
909 newext->ext_type = type;
910 newext->ext_seq = seq;
911 newext->ext_setp = sp;
912 newext->ext_namep = np;
913
914 /* first node in the list */
915 if (*head == NULL) {
916 newext->ext_next = newext->ext_prev = NULL;
917 *head = newext;
918 } else if ((*compare)(*head, newext) >= 0) {
919 /* the first node has a bigger offset, so insert before it */
920 assert((*head)->ext_prev == NULL);
921
922 newext->ext_prev = NULL;
923 newext->ext_next = *head;
924 (*head)->ext_prev = newext;
925 *head = newext;
926 } else {
927 /*
928 * find the next node whose offset is greater than
929 * the one we want to insert, or the end of the list.
930 */
931 for (curext = *head;
932 (curext->ext_next != NULL) &&
933 ((*compare)(curext->ext_next, newext) < 0);
934 (curext = curext->ext_next))
935 ;
936
937 /* link the new node in after the current node */
938 newext->ext_next = curext->ext_next;
939 newext->ext_prev = curext;
940
941 if (curext->ext_next != NULL)
942 curext->ext_next->ext_prev = newext;
943
944 curext->ext_next = newext;
945 }
946 }
947
948 /*
949 * FUNCTION: meta_sp_list_free()
950 * INPUT: head - the head of the list, must be NULL for empty list
951 * OUTPUT: head - points to NULL on return
952 * RETURNS: void
953 * PURPOSE: walks a double linked extent list and frees each node
954 */
955 static void
meta_sp_list_free(sp_ext_node_t ** head)956 meta_sp_list_free(sp_ext_node_t **head)
957 {
958 sp_ext_node_t *ext;
959 sp_ext_node_t *next;
960
961 assert(head != NULL);
962
963 ext = *head;
964 while (ext) {
965 next = ext->ext_next;
966 Free(ext);
967 ext = next;
968 }
969 *head = NULL;
970 }
971
972 /*
973 * FUNCTION: meta_sp_list_remove()
974 * INPUT: head - the head of the list, must be NULL for empty list
975 * ext - the extent to remove, must be a member of the list
976 * OUTPUT: head - points to the new head of the list
977 * RETURNS: void
978 * PURPOSE: unlinks the node specified by ext from the list and
979 * frees it, possibly moving the head pointer forward if
980 * the head is the node being removed.
981 */
982 static void
meta_sp_list_remove(sp_ext_node_t ** head,sp_ext_node_t * ext)983 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext)
984 {
985 assert(head != NULL);
986 assert(*head != NULL);
987
988 if (*head == ext)
989 *head = ext->ext_next;
990
991 if (ext->ext_prev != NULL)
992 ext->ext_prev->ext_next = ext->ext_next;
993 if (ext->ext_next != NULL)
994 ext->ext_next->ext_prev = ext->ext_prev;
995 Free(ext);
996 }
997
998 /*
999 * FUNCTION: meta_sp_list_size()
1000 * INPUT: head - the head of the list, must be NULL for empty list
1001 * exttype - the type of the extents to sum
1002 * exclude_wm - subtract space for extent headers from total
1003 * OUTPUT: none
1004 * RETURNS: sp_ext_length_t - the sum of all of the lengths
1005 * PURPOSE: sums the lengths of all extents in the list matching the
1006 * specified type. This could be used for computing the
1007 * amount of free or used space, for example.
1008 */
1009 static sp_ext_length_t
meta_sp_list_size(sp_ext_node_t * head,sp_ext_type_t exttype,int exclude_wm)1010 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm)
1011 {
1012 sp_ext_node_t *ext;
1013 sp_ext_length_t size = 0LL;
1014
1015 for (ext = head; ext != NULL; ext = ext->ext_next)
1016 if (ext->ext_type == exttype)
1017 size += ext->ext_length -
1018 ((exclude_wm) ? MD_SP_WMSIZE : 0);
1019
1020 return (size);
1021 }
1022
1023 /*
1024 * FUNCTION: meta_sp_list_find()
1025 * INPUT: head - the head of the list, must be NULL for empty list
1026 * offset - the offset contained by the node to find
1027 * OUTPUT: none
1028 * RETURNS: sp_ext_node_t * - the node containing the requested offset
1029 * or NULL if no such nodes were found.
1030 * PURPOSE: finds a node in a list containing the requested offset
1031 * (inclusive). If multiple nodes contain this offset then
1032 * only the first will be returned, though typically these
1033 * lists are managed with non-overlapping nodes.
1034 *
1035 * *The list MUST be sorted by offset for this function to work.*
1036 */
1037 static sp_ext_node_t *
meta_sp_list_find(sp_ext_node_t * head,sp_ext_offset_t offset)1038 meta_sp_list_find(
1039 sp_ext_node_t *head,
1040 sp_ext_offset_t offset
1041 )
1042 {
1043 sp_ext_node_t *ext;
1044
1045 for (ext = head; ext != NULL; ext = ext->ext_next) {
1046 /* check if the offset lies within this extent */
1047 if ((offset >= ext->ext_offset) &&
1048 (offset < ext->ext_offset + ext->ext_length)) {
1049 /*
1050 * the requested extent should always be a
1051 * subset of an extent in the list.
1052 */
1053 return (ext);
1054 }
1055 }
1056 return (NULL);
1057 }
1058
1059 /*
1060 * FUNCTION: meta_sp_list_freefill()
1061 * INPUT: head - the head of the list, must be NULL for empty list
1062 * size - the size of the volume this extent list is
1063 * representing
1064 * OUTPUT: head - the new head of the list
1065 * RETURNS: void
1066 * PURPOSE: finds gaps in the extent list and fills them with a free
1067 * node. If there is a gap at the beginning the head
1068 * pointer will be changed to point to the new free node.
1069 * If there is free space at the end, the last free extent
1070 * will extend all the way out to the size specified.
1071 *
1072 * *The list MUST be sorted by offset for this function to work.*
1073 */
1074 static void
meta_sp_list_freefill(sp_ext_node_t ** head,sp_ext_length_t size)1075 meta_sp_list_freefill(
1076 sp_ext_node_t **head,
1077 sp_ext_length_t size
1078 )
1079 {
1080 sp_ext_node_t *ext;
1081 sp_ext_offset_t curoff = 0LL;
1082
1083 for (ext = *head; ext != NULL; ext = ext->ext_next) {
1084 if (curoff < ext->ext_offset)
1085 meta_sp_list_insert(NULL, NULL, head,
1086 curoff, ext->ext_offset - curoff,
1087 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
1088 curoff = ext->ext_offset + ext->ext_length;
1089 }
1090
1091 /* pad inverse list out to the end */
1092 if (curoff < size)
1093 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff,
1094 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
1095
1096 if (getenv(META_SP_DEBUG)) {
1097 meta_sp_debug("meta_sp_list_freefill: Extent list with "
1098 "holes freefilled:\n");
1099 meta_sp_list_dump(*head);
1100 }
1101 }
1102
1103 /*
1104 * FUNCTION: meta_sp_list_dump()
1105 * INPUT: head - the head of the list, must be NULL for empty list
1106 * OUTPUT: none
1107 * RETURNS: void
1108 * PURPOSE: dumps the entire extent list to stdout for easy debugging
1109 */
1110 static void
meta_sp_list_dump(sp_ext_node_t * head)1111 meta_sp_list_dump(sp_ext_node_t *head)
1112 {
1113 sp_ext_node_t *ext;
1114
1115 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n");
1116 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name",
1117 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev",
1118 "Next");
1119 for (ext = head; ext != NULL; ext = ext->ext_next) {
1120 if (ext->ext_namep != NULL)
1121 meta_sp_debug("%5s", ext->ext_namep->cname);
1122 else
1123 meta_sp_debug("%5s", "NONE");
1124
1125 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq);
1126 switch (ext->ext_type) {
1127 case EXTTYP_ALLOC:
1128 meta_sp_debug("%7s ", "ALLOC");
1129 break;
1130 case EXTTYP_FREE:
1131 meta_sp_debug("%7s ", "FREE");
1132 break;
1133 case EXTTYP_END:
1134 meta_sp_debug("%7s ", "END");
1135 break;
1136 case EXTTYP_RESERVED:
1137 meta_sp_debug("%7s ", "RESV");
1138 break;
1139 default:
1140 meta_sp_debug("%7s ", "INVLD");
1141 break;
1142 }
1143
1144 meta_sp_debug("%10llu %10llu %5u %10p %10p\n",
1145 ext->ext_offset, ext->ext_length,
1146 ext->ext_flags, (void *) ext->ext_prev,
1147 (void *) ext->ext_next);
1148 }
1149 meta_sp_debug("\n");
1150 }
1151
1152 /*
1153 * FUNCTION: meta_sp_list_overlaps()
1154 * INPUT: head - the head of the list, must be NULL for empty list
1155 * OUTPUT: none
1156 * RETURNS: int - 1 if extents overlap, 0 if ok
1157 * PURPOSE: checks a list for overlaps. The list MUST be sorted by
1158 * offset for this function to work properly.
1159 */
1160 static int
meta_sp_list_overlaps(sp_ext_node_t * head)1161 meta_sp_list_overlaps(sp_ext_node_t *head)
1162 {
1163 sp_ext_node_t *ext;
1164
1165 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) {
1166 if (ext->ext_offset + ext->ext_length >
1167 ext->ext_next->ext_offset)
1168 return (1);
1169 }
1170 return (0);
1171 }
1172
1173 /*
1174 * **************************************************************************
1175 * Extent Allocation Functions *
1176 * **************************************************************************
1177 */
1178
1179 /*
1180 * FUNCTION: meta_sp_alloc_by_ext()
1181 * INPUT: sp - the set name for the device the node belongs to
1182 * np - the name of the device the node belongs to
1183 * head - the head of the list, must be NULL for empty list
1184 * free_ext - the free extent being allocated from
1185 * alloc_offset - the offset of the allocation
1186 * alloc_len - the length of the allocation
1187 * seq - the sequence number of the allocation
1188 * OUTPUT: head - the new head pointer
1189 * RETURNS: void
1190 * PURPOSE: allocates a portion of the free extent free_ext. The
1191 * allocated portion starts at alloc_offset and is
1192 * alloc_length long. Both (alloc_offset) and (alloc_offset +
1193 * alloc_length) must be contained within the free extent.
1194 *
1195 * The free extent is split into as many as 3 pieces - a
1196 * free extent containing [ free_offset .. alloc_offset ), an
1197 * allocated extent containing the range [ alloc_offset ..
1198 * alloc_end ], and another free extent containing the
1199 * range ( alloc_end .. free_end ]. If either of the two
1200 * new free extents would be zero length, they are not created.
1201 *
1202 * Finally, the original free extent is removed. All newly
1203 * created extents have the EXTFLG_UPDATE flag set.
1204 */
1205 static void
meta_sp_alloc_by_ext(mdsetname_t * sp,mdname_t * np,sp_ext_node_t ** head,sp_ext_node_t * free_ext,sp_ext_offset_t alloc_offset,sp_ext_length_t alloc_length,uint_t seq)1206 meta_sp_alloc_by_ext(
1207 mdsetname_t *sp,
1208 mdname_t *np,
1209 sp_ext_node_t **head,
1210 sp_ext_node_t *free_ext,
1211 sp_ext_offset_t alloc_offset,
1212 sp_ext_length_t alloc_length,
1213 uint_t seq
1214 )
1215 {
1216 sp_ext_offset_t free_offset = free_ext->ext_offset;
1217 sp_ext_length_t free_length = free_ext->ext_length;
1218
1219 sp_ext_offset_t alloc_end = alloc_offset + alloc_length;
1220 sp_ext_offset_t free_end = free_offset + free_length;
1221
1222 /* allocated extent must be a subset of the free extent */
1223 assert(free_offset <= alloc_offset);
1224 assert(free_end >= alloc_end);
1225
1226 meta_sp_list_remove(head, free_ext);
1227
1228 if (free_offset < alloc_offset) {
1229 meta_sp_list_insert(NULL, NULL, head, free_offset,
1230 (alloc_offset - free_offset), EXTTYP_FREE, 0,
1231 EXTFLG_UPDATE, meta_sp_cmp_by_offset);
1232 }
1233
1234 if (free_end > alloc_end) {
1235 meta_sp_list_insert(NULL, NULL, head, alloc_end,
1236 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE,
1237 meta_sp_cmp_by_offset);
1238 }
1239
1240 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length,
1241 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
1242
1243 if (getenv(META_SP_DEBUG)) {
1244 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n");
1245 meta_sp_list_dump(*head);
1246 }
1247 }
1248
1249 /*
1250 * FUNCTION: meta_sp_alloc_by_len()
1251 * INPUT: sp - the set name for the device the node belongs to
1252 * np - the name of the device the node belongs to
1253 * head - the head of the list, must be NULL for empty list
1254 * *lp - the requested length to allocate
1255 * last_off - the last offset already allocated.
1256 * alignment - the desired extent alignmeent
1257 * OUTPUT: head - the new head pointer
1258 * *lp - the length allocated
1259 * RETURNS: int - -1 if error, the number of new extents on success
1260 * PURPOSE: allocates extents from free space to satisfy the requested
1261 * length. If requested length is zero, allocates all
1262 * remaining free space. This function provides the meat
1263 * of the extent allocation algorithm. Allocation is a
1264 * three tier process:
1265 *
1266 * 1. If last_off is nonzero and there is free space following
1267 * that node, then it is extended to allocate as much of that
1268 * free space as possible. This is useful for metattach.
1269 * 2. If a free extent can be found to satisfy the remaining
1270 * requested space, then satisfy the rest of the request
1271 * from that extent.
1272 * 3. Start allocating space from any remaining free extents until
1273 * the remainder of the request is satisified.
1274 *
1275 * If alignment is non-zero, then every extent modified
1276 * or newly allocated will be aligned modulo alignment,
1277 * with a length that is an integer multiple of
1278 * alignment.
1279 *
1280 * The EXTFLG_UPDATE flag is set for all nodes (free and
1281 * allocated) that require updated watermarks.
1282 *
1283 * This algorithm may have a negative impact on fragmentation
1284 * in pathological cases and may be improved if it turns out
1285 * to be a problem. This may be exacerbated by particularly
1286 * large alignments.
1287 *
1288 * NOTE: It's confusing, so it demands an explanation:
1289 * - len is used to represent requested data space; it
1290 * does not include room for a watermark. On each full
1291 * or partial allocation, len will be decremented by
1292 * alloc_len (see next paragraph) until it reaches
1293 * zero.
1294 * - alloc_len is used to represent data space allocated
1295 * from a particular extent; it does not include space
1296 * for a watermark. In the rare event that a_length
1297 * (see next paragraph) is equal to MD_SP_WMSIZE,
1298 * alloc_len will be zero and the resulting MD_SP_WMSIZE
1299 * fragment of space will be utterly unusable.
1300 * - a_length is used to represent all space to be
1301 * allocated from a particular extent; it DOES include
1302 * space for a watermark.
1303 */
1304 static int
meta_sp_alloc_by_len(mdsetname_t * sp,mdname_t * np,sp_ext_node_t ** head,sp_ext_length_t * lp,sp_ext_offset_t last_off,sp_ext_offset_t alignment)1305 meta_sp_alloc_by_len(
1306 mdsetname_t *sp,
1307 mdname_t *np,
1308 sp_ext_node_t **head,
1309 sp_ext_length_t *lp,
1310 sp_ext_offset_t last_off,
1311 sp_ext_offset_t alignment
1312 )
1313 {
1314 sp_ext_node_t *free_ext;
1315 sp_ext_node_t *alloc_ext;
1316 uint_t last_seq = 0;
1317 uint_t numexts = 0;
1318 sp_ext_length_t freespace;
1319 sp_ext_length_t alloc_len;
1320 sp_ext_length_t len;
1321
1322 /* We're DOA if we can't read *lp */
1323 assert(lp != NULL);
1324 len = *lp;
1325
1326 /*
1327 * Process the nominal case first: we've been given an actual
1328 * size argument, rather than the literal "all"
1329 */
1330
1331 if (len != 0) {
1332
1333 /*
1334 * Short circuit the check for free space. This may
1335 * tell us we have enough space when we really don't
1336 * because each extent loses space to a watermark, but
1337 * it will always tell us there isn't enough space
1338 * correctly. Worst case we do some extra work.
1339 */
1340 freespace = meta_sp_list_size(*head, EXTTYP_FREE,
1341 INCLUDE_WM);
1342
1343 if (freespace < len)
1344 return (-1);
1345
1346 /*
1347 * First see if we can extend the last extent for an
1348 * attach.
1349 */
1350 if (last_off != 0LL) {
1351 int align = 0;
1352
1353 alloc_ext =
1354 meta_sp_list_find(*head, last_off);
1355 assert(alloc_ext != NULL);
1356
1357 /*
1358 * The offset test reflects the
1359 * inclusion of the watermark in the extent
1360 */
1361 align = (alignment > 0) &&
1362 (((alloc_ext->ext_offset + MD_SP_WMSIZE) %
1363 alignment) == 0);
1364
1365 /*
1366 * If we decided not to align here, we should
1367 * also reset "alignment" so we don't bother
1368 * later, either.
1369 */
1370 if (!align) {
1371 alignment = 0;
1372 }
1373
1374 last_seq = alloc_ext->ext_seq;
1375
1376 free_ext = meta_sp_list_find(*head,
1377 alloc_ext->ext_offset +
1378 alloc_ext->ext_length);
1379
1380 /*
1381 * If a free extent follows our last allocated
1382 * extent, then remove the last allocated
1383 * extent and increase the size of the free
1384 * extent to overlap it, then allocate the
1385 * total space from the new free extent.
1386 */
1387 if (free_ext != NULL &&
1388 free_ext->ext_type == EXTTYP_FREE) {
1389 assert(free_ext->ext_offset ==
1390 alloc_ext->ext_offset +
1391 alloc_ext->ext_length);
1392
1393 alloc_len =
1394 MIN(len, free_ext->ext_length);
1395
1396 if (align && (alloc_len < len)) {
1397 /* No watermark space needed */
1398 alloc_len -= alloc_len % alignment;
1399 }
1400
1401 if (alloc_len > 0) {
1402 free_ext->ext_offset -=
1403 alloc_ext->ext_length;
1404 free_ext->ext_length +=
1405 alloc_ext->ext_length;
1406
1407 meta_sp_alloc_by_ext(sp, np, head,
1408 free_ext, free_ext->ext_offset,
1409 alloc_ext->ext_length + alloc_len,
1410 last_seq);
1411
1412 /*
1413 * now remove the original allocated
1414 * node. We may have overlapping
1415 * extents for a short time before
1416 * this node is removed.
1417 */
1418 meta_sp_list_remove(head, alloc_ext);
1419 len -= alloc_len;
1420 }
1421 }
1422 last_seq++;
1423 }
1424
1425 if (len == 0LL)
1426 goto out;
1427
1428 /*
1429 * Next, see if we can find a single allocation for
1430 * the remainder. This may make fragmentation worse
1431 * in some cases, but there's no good way to allocate
1432 * that doesn't have a highly fragmented corner case.
1433 */
1434 for (free_ext = *head; free_ext != NULL;
1435 free_ext = free_ext->ext_next) {
1436 sp_ext_offset_t a_offset;
1437 sp_ext_offset_t a_length;
1438
1439 if (free_ext->ext_type != EXTTYP_FREE)
1440 continue;
1441
1442 /*
1443 * The length test should include space for
1444 * the watermark
1445 */
1446
1447 a_offset = free_ext->ext_offset;
1448 a_length = free_ext->ext_length;
1449
1450 if (alignment > 0) {
1451
1452 /*
1453 * Shortcut for extents that have been
1454 * previously added to pad out the
1455 * data space
1456 */
1457 if (a_length < alignment) {
1458 continue;
1459 }
1460
1461 /*
1462 * Round up so the data space begins
1463 * on a properly aligned boundary.
1464 */
1465 a_offset += alignment -
1466 (a_offset % alignment) - MD_SP_WMSIZE;
1467
1468 /*
1469 * This is only necessary in case the
1470 * watermark size is ever greater than
1471 * one. It'll never happen, of
1472 * course; we'll get rid of watermarks
1473 * before we make 'em bigger.
1474 */
1475 if (a_offset < free_ext->ext_offset) {
1476 a_offset += alignment;
1477 }
1478
1479 /*
1480 * Adjust the length to account for
1481 * the space lost above (if any)
1482 */
1483 a_length -=
1484 (a_offset - free_ext->ext_offset);
1485 }
1486
1487 if (a_length >= len + MD_SP_WMSIZE) {
1488 meta_sp_alloc_by_ext(sp, np, head,
1489 free_ext, a_offset,
1490 len + MD_SP_WMSIZE, last_seq);
1491
1492 len = 0LL;
1493 numexts++;
1494 break;
1495 }
1496 }
1497
1498 if (len == 0LL)
1499 goto out;
1500
1501
1502 /*
1503 * If the request could not be satisfied by extending
1504 * the last extent or by a single extent, then put
1505 * multiple smaller extents together until the request
1506 * is satisfied.
1507 */
1508 for (free_ext = *head; (free_ext != NULL) && (len > 0);
1509 free_ext = free_ext->ext_next) {
1510 sp_ext_offset_t a_offset;
1511 sp_ext_length_t a_length;
1512
1513 if (free_ext->ext_type != EXTTYP_FREE)
1514 continue;
1515
1516 a_offset = free_ext->ext_offset;
1517 a_length = free_ext->ext_length;
1518
1519 if (alignment > 0) {
1520
1521 /*
1522 * Shortcut for extents that have been
1523 * previously added to pad out the
1524 * data space
1525 */
1526 if (a_length < alignment) {
1527 continue;
1528 }
1529
1530 /*
1531 * Round up so the data space begins
1532 * on a properly aligned boundary.
1533 */
1534 a_offset += alignment -
1535 (a_offset % alignment) - MD_SP_WMSIZE;
1536
1537 /*
1538 * This is only necessary in case the
1539 * watermark size is ever greater than
1540 * one. It'll never happen, of
1541 * course; we'll get rid of watermarks
1542 * before we make 'em bigger.
1543 */
1544 if (a_offset < free_ext->ext_offset) {
1545 a_offset += alignment;
1546 }
1547
1548 /*
1549 * Adjust the length to account for
1550 * the space lost above (if any)
1551 */
1552 a_length -=
1553 (a_offset - free_ext->ext_offset);
1554
1555 /*
1556 * Adjust the length to be properly
1557 * aligned if it is NOT to be the
1558 * last extent in the soft partition.
1559 */
1560 if ((a_length - MD_SP_WMSIZE) < len)
1561 a_length -=
1562 (a_length - MD_SP_WMSIZE)
1563 % alignment;
1564 }
1565
1566 alloc_len = MIN(len, a_length - MD_SP_WMSIZE);
1567 if (alloc_len == 0)
1568 continue;
1569
1570 /*
1571 * meta_sp_alloc_by_ext() expects the
1572 * allocation length to include the watermark
1573 * size, which is why we don't simply pass in
1574 * alloc_len here.
1575 */
1576 meta_sp_alloc_by_ext(sp, np, head, free_ext,
1577 a_offset, MIN(len + MD_SP_WMSIZE, a_length),
1578 last_seq);
1579
1580 len -= alloc_len;
1581 numexts++;
1582 last_seq++;
1583 }
1584
1585
1586 /*
1587 * If there was not enough space we can throw it all
1588 * away since no real work has been done yet.
1589 */
1590 if (len != 0) {
1591 meta_sp_list_free(head);
1592 return (-1);
1593 }
1594 }
1595
1596 /*
1597 * Otherwise, the literal "all" was specified: allocate all
1598 * available free space. Don't bother with alignment.
1599 */
1600 else {
1601 /* First, extend the last extent if this is a grow */
1602 if (last_off != 0LL) {
1603 alloc_ext =
1604 meta_sp_list_find(*head, last_off);
1605 assert(alloc_ext != NULL);
1606
1607 last_seq = alloc_ext->ext_seq;
1608
1609 free_ext = meta_sp_list_find(*head,
1610 alloc_ext->ext_offset +
1611 alloc_ext->ext_length);
1612
1613 /*
1614 * If a free extent follows our last allocated
1615 * extent, then remove the last allocated
1616 * extent and increase the size of the free
1617 * extent to overlap it, then allocate the
1618 * total space from the new free extent.
1619 */
1620 if (free_ext != NULL &&
1621 free_ext->ext_type == EXTTYP_FREE) {
1622 assert(free_ext->ext_offset ==
1623 alloc_ext->ext_offset +
1624 alloc_ext->ext_length);
1625
1626 len = alloc_len =
1627 free_ext->ext_length;
1628
1629 free_ext->ext_offset -=
1630 alloc_ext->ext_length;
1631 free_ext->ext_length +=
1632 alloc_ext->ext_length;
1633
1634 meta_sp_alloc_by_ext(sp, np, head,
1635 free_ext, free_ext->ext_offset,
1636 alloc_ext->ext_length + alloc_len,
1637 last_seq);
1638
1639 /*
1640 * now remove the original allocated
1641 * node. We may have overlapping
1642 * extents for a short time before
1643 * this node is removed.
1644 */
1645 meta_sp_list_remove(head, alloc_ext);
1646 }
1647
1648 last_seq++;
1649 }
1650
1651 /* Next, grab all remaining free space */
1652 for (free_ext = *head; free_ext != NULL;
1653 free_ext = free_ext->ext_next) {
1654
1655 if (free_ext->ext_type == EXTTYP_FREE) {
1656 alloc_len =
1657 free_ext->ext_length - MD_SP_WMSIZE;
1658 if (alloc_len == 0)
1659 continue;
1660
1661 /*
1662 * meta_sp_alloc_by_ext() expects the
1663 * allocation length to include the
1664 * watermark size, which is why we
1665 * don't simply pass in alloc_len
1666 * here.
1667 */
1668 meta_sp_alloc_by_ext(sp, np, head,
1669 free_ext, free_ext->ext_offset,
1670 free_ext->ext_length,
1671 last_seq);
1672
1673 len += alloc_len;
1674 numexts++;
1675 last_seq++;
1676 }
1677 }
1678 }
1679
1680 out:
1681 if (getenv(META_SP_DEBUG)) {
1682 meta_sp_debug("meta_sp_alloc_by_len: Extent list after "
1683 "allocation:\n");
1684 meta_sp_list_dump(*head);
1685 }
1686
1687 if (*lp == 0) {
1688 *lp = len;
1689
1690 /*
1691 * Make sure the callers hit a no space error if we
1692 * didn't actually find anything.
1693 */
1694 if (len == 0) {
1695 return (-1);
1696 }
1697 }
1698
1699 return (numexts);
1700 }
1701
1702 /*
1703 * FUNCTION: meta_sp_alloc_by_list()
1704 * INPUT: sp - the set name for the device the node belongs to
1705 * np - the name of the device the node belongs to
1706 * head - the head of the list, must be NULL for empty list
1707 * oblist - an extent list containing requested nodes to allocate
1708 * OUTPUT: head - the new head pointer
1709 * RETURNS: int - -1 if error, the number of new extents on success
1710 * PURPOSE: allocates extents from free space to satisfy the requested
1711 * extent list. This is primarily used for the -o/-b options
1712 * where the user may specifically request extents to allocate.
1713 * Each extent in the oblist must be a subset (inclusive) of a
1714 * free extent and may not overlap each other. This
1715 * function sets the EXTFLG_UPDATE flag for each node that
1716 * requires a watermark update after allocating.
1717 */
1718 static int
meta_sp_alloc_by_list(mdsetname_t * sp,mdname_t * np,sp_ext_node_t ** head,sp_ext_node_t * oblist)1719 meta_sp_alloc_by_list(
1720 mdsetname_t *sp,
1721 mdname_t *np,
1722 sp_ext_node_t **head,
1723 sp_ext_node_t *oblist
1724 )
1725 {
1726 sp_ext_node_t *ext;
1727 sp_ext_node_t *free_ext;
1728 uint_t numexts = 0;
1729
1730 for (ext = oblist; ext != NULL; ext = ext->ext_next) {
1731
1732 free_ext = meta_sp_list_find(*head,
1733 ext->ext_offset - MD_SP_WMSIZE);
1734
1735 /* Make sure the allocation is within the free extent */
1736 if ((free_ext == NULL) ||
1737 (ext->ext_offset + ext->ext_length >
1738 free_ext->ext_offset + free_ext->ext_length) ||
1739 (free_ext->ext_type != EXTTYP_FREE))
1740 return (-1);
1741
1742 meta_sp_alloc_by_ext(sp, np, head, free_ext,
1743 ext->ext_offset - MD_SP_WMSIZE,
1744 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq);
1745
1746 numexts++;
1747 }
1748
1749 assert(meta_sp_list_overlaps(*head) == 0);
1750
1751 if (getenv(META_SP_DEBUG)) {
1752 meta_sp_debug("meta_sp_alloc_by_list: Extent list after "
1753 "allocation:\n");
1754 meta_sp_list_dump(*head);
1755 }
1756
1757 return (numexts);
1758 }
1759
1760 /*
1761 * **************************************************************************
1762 * Extent List Population Functions *
1763 * **************************************************************************
1764 */
1765
1766 /*
1767 * FUNCTION: meta_sp_extlist_from_namelist()
1768 * INPUT: sp - the set name for the device the node belongs to
1769 * spnplp - the namelist of soft partitions to build a list from
1770 * OUTPUT: extlist - the extent list built from the SPs in the namelist
1771 * ep - return error pointer
1772 * RETURNS: int - -1 if error, 0 on success
1773 * PURPOSE: builds an extent list representing the soft partitions
1774 * specified in the namelist. Each extent in each soft
1775 * partition is added to the list with the type EXTTYP_ALLOC.
1776 * The EXTFLG_UPDATE flag is not set on any nodes. Each
1777 * extent in the list includes the space occupied by the
1778 * watermark, which is not included in the unit structures.
1779 */
1780 static int
meta_sp_extlist_from_namelist(mdsetname_t * sp,mdnamelist_t * spnlp,sp_ext_node_t ** extlist,md_error_t * ep)1781 meta_sp_extlist_from_namelist(
1782 mdsetname_t *sp,
1783 mdnamelist_t *spnlp,
1784 sp_ext_node_t **extlist,
1785 md_error_t *ep
1786 )
1787 {
1788 int extn;
1789 md_sp_t *msp; /* unit structure of the sp's */
1790 mdnamelist_t *namep;
1791
1792 assert(sp != NULL);
1793
1794 /*
1795 * Now go through the soft partitions and add a node to the used
1796 * list for each allocated extent.
1797 */
1798 for (namep = spnlp; namep != NULL; namep = namep->next) {
1799 mdname_t *curnp = namep->namep;
1800
1801 /* get the unit structure */
1802 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
1803 return (-1);
1804
1805 for (extn = 0; (extn < msp->ext.ext_len); extn++) {
1806 md_sp_ext_t *extp = &msp->ext.ext_val[extn];
1807
1808 /*
1809 * subtract from offset and add to the length
1810 * to account for the watermark, which is not
1811 * contained in the extents in the unit structure.
1812 */
1813 meta_sp_list_insert(sp, curnp, extlist,
1814 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
1815 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset);
1816 }
1817 }
1818 return (0);
1819 }
1820
1821 /*
1822 * FUNCTION: meta_sp_extlist_from_wm()
1823 * INPUT: sp - the set name for the device the node belongs to
1824 * compnp - the name of the device to scan watermarks on
1825 * OUTPUT: extlist - the extent list built from the SPs in the namelist
1826 * ep - return error pointer
1827 * RETURNS: int - -1 if error, 0 on success
1828 * PURPOSE: builds an extent list representing the soft partitions
1829 * specified in the namelist. Each extent in each soft
1830 * partition is added to the list with the type EXTTYP_ALLOC.
1831 * The EXTFLG_UPDATE flag is not set on any nodes. Each
1832 * extent in the list includes the space occupied by the
1833 * watermark, which is not included in the unit structures.
1834 */
1835 static int
meta_sp_extlist_from_wm(mdsetname_t * sp,mdname_t * compnp,sp_ext_node_t ** extlist,ext_cmpfunc_t compare,md_error_t * ep)1836 meta_sp_extlist_from_wm(
1837 mdsetname_t *sp,
1838 mdname_t *compnp,
1839 sp_ext_node_t **extlist,
1840 ext_cmpfunc_t compare,
1841 md_error_t *ep
1842 )
1843 {
1844 mp_watermark_t wm;
1845 mdname_t *np = NULL;
1846 mdsetname_t *spsetp = NULL;
1847 sp_ext_offset_t cur_off;
1848 md_set_desc *sd;
1849 int init = 0;
1850 mdkey_t key;
1851 minor_t mnum;
1852
1853 if (!metaislocalset(sp)) {
1854 if ((sd = metaget_setdesc(sp, ep)) == NULL)
1855 return (-1);
1856 }
1857
1858 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR)
1859 return (-1);
1860
1861 for (;;) {
1862 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) {
1863 return (-1);
1864 }
1865
1866 /* get the set and name pointers */
1867 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) {
1868 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) {
1869 return (-1);
1870 }
1871 }
1872
1873 /*
1874 * For the MN set, meta_init_make_device needs to
1875 * be run on all the nodes so the entries for the
1876 * softpart device name and its comp can be created
1877 * in the same order in the replica namespace. If
1878 * we have it run on mdmn_do_iocset then the mddbs
1879 * will be out of sync between master node and slave
1880 * nodes.
1881 */
1882 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) {
1883
1884 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) {
1885 md_mn_msg_addmdname_t *send_params;
1886 int result;
1887 md_mn_result_t *resp = NULL;
1888 int message_size;
1889
1890 message_size = sizeof (*send_params) +
1891 strlen(wm.wm_mdname) + 1;
1892 send_params = Zalloc(message_size);
1893 send_params->addmdname_setno = sp->setno;
1894 (void) strcpy(&send_params->addmdname_name[0],
1895 wm.wm_mdname);
1896 result = mdmn_send_message(sp->setno,
1897 MD_MN_MSG_ADDMDNAME,
1898 MD_MSGF_PANIC_WHEN_INCONSISTENT, 0,
1899 (char *)send_params, message_size, &resp,
1900 ep);
1901 Free(send_params);
1902 if (resp != NULL) {
1903 if (resp->mmr_exitval != 0) {
1904 free_result(resp);
1905 return (-1);
1906 }
1907 free_result(resp);
1908 }
1909 if (result != 0)
1910 return (-1);
1911 } else {
1912
1913 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) {
1914 if ((key = meta_init_make_device(&sp,
1915 wm.wm_mdname, ep)) <= 0) {
1916 return (-1);
1917 }
1918 init = 1;
1919 }
1920 }
1921
1922 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep);
1923 if (np == NULL) {
1924 if (init) {
1925 if (meta_getnmentbykey(sp->setno,
1926 MD_SIDEWILD, key, NULL, &mnum,
1927 NULL, ep) != NULL) {
1928 (void) metaioctl(MD_IOCREM_DEV,
1929 &mnum, ep, NULL);
1930 }
1931 (void) del_self_name(sp, key, ep);
1932 }
1933 return (-1);
1934 }
1935 }
1936
1937 /* insert watermark into extent list */
1938 meta_sp_list_insert(spsetp, np, extlist, cur_off,
1939 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq,
1940 EXTFLG_UPDATE, compare);
1941
1942 /* if we see the end watermark, we're done */
1943 if (wm.wm_type == EXTTYP_END)
1944 break;
1945
1946 cur_off += wm.wm_length + 1;
1947
1948 /* clear out set and name pointers for next iteration */
1949 np = NULL;
1950 spsetp = NULL;
1951 }
1952
1953 return (0);
1954 }
1955
1956 /*
1957 * **************************************************************************
1958 * Print (metastat) Functions *
1959 * **************************************************************************
1960 */
1961
1962 /*
1963 * FUNCTION: meta_sp_short_print()
1964 * INPUT: msp - the unit structure to display
1965 * fp - the file pointer to send output to
1966 * options - print options from the command line processor
1967 * OUTPUT: ep - return error pointer
1968 * RETURNS: int - -1 if error, 0 on success
1969 * PURPOSE: display a short report of the soft partition in md.tab
1970 * form, primarily used for metastat -p.
1971 */
1972 static int
meta_sp_short_print(md_sp_t * msp,char * fname,FILE * fp,mdprtopts_t options,md_error_t * ep)1973 meta_sp_short_print(
1974 md_sp_t *msp,
1975 char *fname,
1976 FILE *fp,
1977 mdprtopts_t options,
1978 md_error_t *ep
1979 )
1980 {
1981 int extn;
1982
1983 if (options & PRINT_LARGEDEVICES) {
1984 if ((msp->common.revision & MD_64BIT_META_DEV) == 0)
1985 return (0);
1986 }
1987
1988 if (options & PRINT_FN) {
1989 if ((msp->common.revision & MD_FN_META_DEV) == 0)
1990 return (0);
1991 }
1992
1993 /* print name and -p */
1994 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF)
1995 return (mdsyserror(ep, errno, fname));
1996
1997 /* print the component */
1998 /*
1999 * Always print the full path name
2000 */
2001 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF)
2002 return (mdsyserror(ep, errno, fname));
2003
2004 /* print out each extent */
2005 for (extn = 0; (extn < msp->ext.ext_len); extn++) {
2006 md_sp_ext_t *extp = &msp->ext.ext_val[extn];
2007 if (fprintf(fp, " -o %llu -b %llu ", extp->poff,
2008 extp->len) == EOF)
2009 return (mdsyserror(ep, errno, fname));
2010 }
2011
2012 if (fprintf(fp, "\n") == EOF)
2013 return (mdsyserror(ep, errno, fname));
2014
2015 /* success */
2016 return (0);
2017 }
2018
2019 /*
2020 * FUNCTION: meta_sp_status_to_name()
2021 * INPUT: xsp_status - the status value to convert to a string
2022 * tstate - transient errored device state. If set the
2023 * device is Unavailable
2024 * OUTPUT: none
2025 * RETURNS: char * - a pointer to the string representing the status value
2026 * PURPOSE: return an internationalized string representing the
2027 * status value for a soft partition. The strings are
2028 * strdup'd and must be freed by the caller.
2029 */
2030 static char *
meta_sp_status_to_name(xsp_status_t xsp_status,uint_t tstate)2031 meta_sp_status_to_name(
2032 xsp_status_t xsp_status,
2033 uint_t tstate
2034 )
2035 {
2036 char *rval = NULL;
2037
2038 /*
2039 * Check to see if we have MD_INACCESSIBLE set. This is the only valid
2040 * value for an 'Unavailable' return. tstate can be set because of
2041 * other multi-node reasons (e.g. ABR being set)
2042 */
2043 if (tstate & MD_INACCESSIBLE) {
2044 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable")));
2045 }
2046
2047 switch (xsp_status) {
2048 case MD_SP_CREATEPEND:
2049 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating"));
2050 break;
2051 case MD_SP_GROWPEND:
2052 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing"));
2053 break;
2054 case MD_SP_DELPEND:
2055 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting"));
2056 break;
2057 case MD_SP_OK:
2058 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay"));
2059 break;
2060 case MD_SP_ERR:
2061 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored"));
2062 break;
2063 case MD_SP_RECOVER:
2064 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering"));
2065 break;
2066 }
2067
2068 if (rval == NULL)
2069 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid"));
2070
2071 return (rval);
2072 }
2073
2074 /*
2075 * FUNCTION: meta_sp_report()
2076 * INPUT: sp - the set name for the unit being displayed
2077 * msp - the unit structure to display
2078 * nlpp - pass back the large devs
2079 * fp - the file pointer to send output to
2080 * options - print options from the command line processor
2081 * OUTPUT: ep - return error pointer
2082 * RETURNS: int - -1 if error, 0 on success
2083 * PURPOSE: print a full report of the device specified
2084 */
2085 static int
meta_sp_report(mdsetname_t * sp,md_sp_t * msp,mdnamelist_t ** nlpp,char * fname,FILE * fp,mdprtopts_t options,md_error_t * ep)2086 meta_sp_report(
2087 mdsetname_t *sp,
2088 md_sp_t *msp,
2089 mdnamelist_t **nlpp,
2090 char *fname,
2091 FILE *fp,
2092 mdprtopts_t options,
2093 md_error_t *ep
2094 )
2095 {
2096 uint_t extn;
2097 char *status;
2098 char *devid = "";
2099 mdname_t *didnp = NULL;
2100 ddi_devid_t dtp;
2101 int len;
2102 uint_t tstate = 0;
2103
2104 if (options & PRINT_LARGEDEVICES) {
2105 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) {
2106 return (0);
2107 } else {
2108 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
2109 return (-1);
2110 }
2111 }
2112
2113 if (options & PRINT_FN) {
2114 if ((msp->common.revision & MD_FN_META_DEV) == 0) {
2115 return (0);
2116 } else {
2117 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
2118 return (-1);
2119 }
2120 }
2121
2122 if (options & PRINT_HEADER) {
2123 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"),
2124 msp->common.namep->cname) == EOF)
2125 return (mdsyserror(ep, errno, fname));
2126 }
2127
2128 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"),
2129 msp->compnamep->cname) == EOF)
2130 return (mdsyserror(ep, errno, fname));
2131
2132 /* Determine if device is available before displaying status */
2133 if (metaismeta(msp->common.namep)) {
2134 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0)
2135 return (-1);
2136 }
2137 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED);
2138
2139 /* print out "State" to be consistent with other metadevices */
2140 if (tstate & MD_ABR_CAP) {
2141 if (fprintf(fp, dgettext(TEXT_DOMAIN,
2142 " State: %s - Application Based Recovery (ABR)\n"),
2143 status) == EOF) {
2144 Free(status);
2145 return (mdsyserror(ep, errno, fname));
2146 }
2147 } else {
2148 if (fprintf(fp, dgettext(TEXT_DOMAIN,
2149 " State: %s\n"), status) == EOF) {
2150 Free(status);
2151 return (mdsyserror(ep, errno, fname));
2152 }
2153 }
2154 free(status);
2155
2156 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"),
2157 msp->common.size,
2158 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF)
2159 return (mdsyserror(ep, errno, fname));
2160
2161 /* print component details */
2162 if (! metaismeta(msp->compnamep)) {
2163 diskaddr_t start_blk;
2164 int has_mddb;
2165 char *has_mddb_str;
2166
2167 /* print header */
2168 /*
2169 * Building a format string on the fly that will
2170 * be used in (f)printf. This allows the length
2171 * of the ctd to vary from small to large without
2172 * looking horrible.
2173 */
2174 len = strlen(msp->compnamep->cname);
2175 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
2176 len += 2;
2177 if (fprintf(fp,
2178 "\t%-*.*s %-12.12s %-5.5s %s\n",
2179 len, len,
2180 dgettext(TEXT_DOMAIN, "Device"),
2181 dgettext(TEXT_DOMAIN, "Start Block"),
2182 dgettext(TEXT_DOMAIN, "Dbase"),
2183 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
2184 return (mdsyserror(ep, errno, fname));
2185 }
2186
2187
2188 /* get info */
2189 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) ==
2190 MD_DISKADDR_ERROR)
2191 return (-1);
2192
2193 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0)
2194 return (-1);
2195
2196 if (has_mddb)
2197 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
2198 else
2199 has_mddb_str = dgettext(TEXT_DOMAIN, "No");
2200
2201 /* populate the key in the name_p structure */
2202 didnp = metadevname(&sp, msp->compnamep->dev, ep);
2203 if (didnp == NULL) {
2204 return (-1);
2205 }
2206
2207 /* determine if devid does NOT exist */
2208 if (options & PRINT_DEVID) {
2209 if ((dtp = meta_getdidbykey(sp->setno,
2210 getmyside(sp, ep), didnp->key, ep)) == NULL)
2211 devid = dgettext(TEXT_DOMAIN, "No ");
2212 else {
2213 devid = dgettext(TEXT_DOMAIN, "Yes");
2214 free(dtp);
2215 }
2216 }
2217
2218 /* print info */
2219 /*
2220 * This allows the length
2221 * of the ctd to vary from small to large without
2222 * looking horrible.
2223 */
2224 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n",
2225 len, msp->compnamep->cname,
2226 start_blk, has_mddb_str, devid) == EOF) {
2227 return (mdsyserror(ep, errno, fname));
2228 }
2229 (void) fprintf(fp, "\n");
2230 }
2231
2232
2233 /* print the headers */
2234 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n",
2235 dgettext(TEXT_DOMAIN, "Extent"),
2236 dgettext(TEXT_DOMAIN, "Start Block"),
2237 dgettext(TEXT_DOMAIN, "Block count")) == EOF)
2238 return (mdsyserror(ep, errno, fname));
2239
2240 /* print out each extent */
2241 for (extn = 0; (extn < msp->ext.ext_len); extn++) {
2242 md_sp_ext_t *extp = &msp->ext.ext_val[extn];
2243
2244 /* If PRINT_TIMES option is ever supported, add output here */
2245 if (fprintf(fp, "\t%6u %24llu %24llu\n",
2246 extn, extp->poff, extp->len) == EOF)
2247 return (mdsyserror(ep, errno, fname));
2248 }
2249
2250 /* separate records with a newline */
2251 (void) fprintf(fp, "\n");
2252 return (0);
2253 }
2254
2255 /*
2256 * FUNCTION: meta_sp_print()
2257 * INPUT: sp - the set name for the unit being displayed
2258 * np - the name of the device to print
2259 * fname - ??? not used
2260 * fp - the file pointer to send output to
2261 * options - print options from the command line processor
2262 * OUTPUT: ep - return error pointer
2263 * RETURNS: int - -1 if error, 0 on success
2264 * PURPOSE: print a full report of the device specified by metastat.
2265 * This is the main entry point for printing.
2266 */
2267 int
meta_sp_print(mdsetname_t * sp,mdname_t * np,mdnamelist_t ** nlpp,char * fname,FILE * fp,mdprtopts_t options,md_error_t * ep)2268 meta_sp_print(
2269 mdsetname_t *sp,
2270 mdname_t *np,
2271 mdnamelist_t **nlpp,
2272 char *fname,
2273 FILE *fp,
2274 mdprtopts_t options,
2275 md_error_t *ep
2276 )
2277 {
2278 md_sp_t *msp;
2279 md_unit_t *mdp;
2280 int rval = 0;
2281 set_t setno;
2282 minor_t unit;
2283
2284 /* should always have the same set */
2285 assert(sp != NULL);
2286
2287 /* print all the soft partitions */
2288 if (np == NULL) {
2289 mdnamelist_t *nlp = NULL;
2290 mdnamelist_t *p;
2291 int cnt;
2292
2293 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0)
2294 return (-1);
2295 else if (cnt == 0)
2296 return (0);
2297
2298 /* recusively print them out */
2299 for (p = nlp; (p != NULL); p = p->next) {
2300 mdname_t *curnp = p->namep;
2301
2302 /*
2303 * one problem with the rval of -1 here is that
2304 * the error gets "lost" when the next device is
2305 * printed, but we want to print them all anyway.
2306 */
2307 rval = meta_sp_print(sp, curnp, nlpp, fname, fp,
2308 options, ep);
2309 }
2310
2311 /* clean up, return success */
2312 metafreenamelist(nlp);
2313 return (rval);
2314 }
2315
2316 /* get the unit structure */
2317 if ((msp = meta_get_sp_common(sp, np,
2318 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
2319 return (-1);
2320
2321 /* check for parented */
2322 if ((! (options & PRINT_SUBDEVS)) &&
2323 (MD_HAS_PARENT(msp->common.parent))) {
2324 return (0);
2325 }
2326
2327 /* print appropriate detail */
2328 if (options & PRINT_SHORT) {
2329 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0)
2330 return (-1);
2331 } else {
2332 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0)
2333 return (-1);
2334 }
2335
2336 /*
2337 * Print underlying metadevices if they are parented to us and
2338 * if the info for the underlying metadevice has not been printed.
2339 */
2340 if (metaismeta(msp->compnamep)) {
2341 /* get the unit structure for the subdevice */
2342 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL)
2343 return (-1);
2344
2345 setno = MD_MIN2SET(MD_SID(mdp));
2346 unit = MD_MIN2UNIT(MD_SID(mdp));
2347
2348 /* If info not already printed, recurse */
2349 if (sp_parent_printed[setno] == NULL ||
2350 !BT_TEST(sp_parent_printed[setno], unit)) {
2351 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp,
2352 (options | PRINT_HEADER | PRINT_SUBDEVS),
2353 NULL, ep) != 0) {
2354 return (-1);
2355 }
2356 if (sp_parent_printed[setno] == NULL)
2357 sp_parent_printed[setno] =
2358 Zalloc(BT_SIZEOFMAP(MD_MAXUNITS));
2359 BT_SET(sp_parent_printed[setno], unit);
2360 }
2361 }
2362 return (0);
2363 }
2364
2365 /*
2366 * **************************************************************************
2367 * Watermark Manipulation Functions *
2368 * **************************************************************************
2369 */
2370
2371 /*
2372 * FUNCTION: meta_sp_get_start()
2373 * INPUT: sp - the operating set
2374 * np - device upon which the sp is being built
2375 * OUTPUT: ep - return error pointer
2376 * RETURNS: daddr_t - -1 if error, otherwise the start block
2377 * PURPOSE: Encapsulate the determination of the start block of the
2378 * device upon which the sp is built or being built.
2379 */
2380 static diskaddr_t
meta_sp_get_start(mdsetname_t * sp,mdname_t * np,md_error_t * ep)2381 meta_sp_get_start(
2382 mdsetname_t *sp,
2383 mdname_t *np,
2384 md_error_t *ep
2385 )
2386 {
2387 daddr_t start_block;
2388
2389 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR)
2390 start_block += MD_SP_START;
2391
2392 return (start_block);
2393 }
2394
2395 /*
2396 * FUNCTION: meta_sp_update_wm_common()
2397 * INPUT: sp - the operating set
2398 * msp - a pointer to the XDR unit structure
2399 * extlist - the extent list specifying watermarks to update
2400 * iocval - either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM
2401 * OUTPUT: ep - return error pointer
2402 * RETURNS: int - -1 if error, 0 on success
2403 * PURPOSE: steps backwards through the extent list updating
2404 * watermarks for all extents with the EXTFLG_UPDATE flag
2405 * set. Writing the watermarks guarantees consistency when
2406 * extents must be broken into pieces since the original
2407 * watermark will be the last to be updated, and will be
2408 * changed to point to a new watermark that is already
2409 * known to be consistent. If one of the writes fails, the
2410 * original watermark stays intact and none of the changes
2411 * are realized.
2412 */
2413 static int
meta_sp_update_wm_common(mdsetname_t * sp,md_sp_t * msp,sp_ext_node_t * extlist,int iocval,md_error_t * ep)2414 meta_sp_update_wm_common(
2415 mdsetname_t *sp,
2416 md_sp_t *msp,
2417 sp_ext_node_t *extlist,
2418 int iocval,
2419 md_error_t *ep
2420 )
2421 {
2422 sp_ext_node_t *ext;
2423 sp_ext_node_t *tail;
2424 mp_watermark_t *wmp, *watermarks;
2425 xsp_offset_t *osp, *offsets;
2426 int update_count = 0;
2427 int rval = 0;
2428 md_unit_t *mdp;
2429 md_sp_update_wm_t update_params;
2430
2431 if (getenv(META_SP_DEBUG)) {
2432 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n");
2433 meta_sp_list_dump(extlist);
2434 }
2435
2436 /*
2437 * find the last node so we can write the watermarks backwards
2438 * and count watermarks to update so we can allocate space
2439 */
2440 for (ext = extlist; ext != NULL; ext = ext->ext_next) {
2441 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
2442 update_count++;
2443 }
2444
2445 if (ext->ext_next == NULL) {
2446 tail = ext;
2447 }
2448 }
2449 ext = tail;
2450
2451 wmp = watermarks =
2452 Zalloc(update_count * sizeof (mp_watermark_t));
2453 osp = offsets =
2454 Zalloc(update_count * sizeof (sp_ext_offset_t));
2455
2456 while (ext != NULL) {
2457 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
2458 /* update watermark */
2459 wmp->wm_magic = MD_SP_MAGIC;
2460 wmp->wm_version = MD_SP_VERSION;
2461 wmp->wm_type = ext->ext_type;
2462 wmp->wm_seq = ext->ext_seq;
2463 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE;
2464
2465 /* fill in the volume name and set name */
2466 if (ext->ext_namep != NULL)
2467 (void) strcpy(wmp->wm_mdname,
2468 ext->ext_namep->cname);
2469 else
2470 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME);
2471 if (ext->ext_setp != NULL &&
2472 ext->ext_setp->setno != MD_LOCAL_SET)
2473 (void) strcpy(wmp->wm_setname,
2474 ext->ext_setp->setname);
2475 else
2476 (void) strcpy(wmp->wm_setname,
2477 MD_SP_LOCALSETNAME);
2478
2479 /* Generate the checksum */
2480 wmp->wm_checksum = 0;
2481 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum,
2482 sizeof (*wmp), NULL);
2483
2484 /* record the extent offset */
2485 *osp = ext->ext_offset;
2486
2487 /* Advance the placeholders */
2488 osp++; wmp++;
2489 }
2490 ext = ext->ext_prev;
2491 }
2492
2493 mdp = meta_get_mdunit(sp, msp->common.namep, ep);
2494 if (mdp == NULL) {
2495 rval = -1;
2496 goto out;
2497 }
2498
2499 (void) memset(&update_params, 0, sizeof (update_params));
2500 update_params.mnum = MD_SID(mdp);
2501 update_params.count = update_count;
2502 update_params.wmp = (uintptr_t)watermarks;
2503 update_params.osp = (uintptr_t)offsets;
2504 MD_SETDRIVERNAME(&update_params, MD_SP,
2505 MD_MIN2SET(update_params.mnum));
2506
2507 if (metaioctl(iocval, &update_params, &update_params.mde,
2508 msp->common.namep->cname) != 0) {
2509 (void) mdstealerror(ep, &update_params.mde);
2510 rval = -1;
2511 goto out;
2512 }
2513
2514 out:
2515 Free(watermarks);
2516 Free(offsets);
2517
2518 return (rval);
2519 }
2520
2521 static int
meta_sp_update_wm(mdsetname_t * sp,md_sp_t * msp,sp_ext_node_t * extlist,md_error_t * ep)2522 meta_sp_update_wm(
2523 mdsetname_t *sp,
2524 md_sp_t *msp,
2525 sp_ext_node_t *extlist,
2526 md_error_t *ep
2527 )
2528 {
2529 return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM,
2530 ep));
2531 }
2532
2533 static int
meta_mn_sp_update_wm(mdsetname_t * sp,md_sp_t * msp,sp_ext_node_t * extlist,md_error_t * ep)2534 meta_mn_sp_update_wm(
2535 mdsetname_t *sp,
2536 md_sp_t *msp,
2537 sp_ext_node_t *extlist,
2538 md_error_t *ep
2539 )
2540 {
2541 return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM,
2542 ep));
2543 }
2544
2545 /*
2546 * FUNCTION: meta_sp_clear_wm()
2547 * INPUT: sp - the operating set
2548 * msp - the unit structure for the soft partition to clear
2549 * OUTPUT: ep - return error pointer
2550 * RETURNS: int - -1 if error, 0 on success
2551 * PURPOSE: steps through the extents for a soft partition unit and
2552 * creates an extent list designed to mark all of the
2553 * watermarks for those extents as free. The extent list
2554 * is then passed to meta_sp_update_wm() to actually write
2555 * the watermarks out.
2556 */
2557 static int
meta_sp_clear_wm(mdsetname_t * sp,md_sp_t * msp,md_error_t * ep)2558 meta_sp_clear_wm(
2559 mdsetname_t *sp,
2560 md_sp_t *msp,
2561 md_error_t *ep
2562 )
2563 {
2564 sp_ext_node_t *extlist = NULL;
2565 int numexts = msp->ext.ext_len;
2566 uint_t i;
2567 int rval = 0;
2568
2569 /* for each watermark must set the flag to SP_FREE */
2570 for (i = 0; i < numexts; i++) {
2571 md_sp_ext_t *extp = &msp->ext.ext_val[i];
2572
2573 meta_sp_list_insert(NULL, NULL, &extlist,
2574 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
2575 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
2576 }
2577
2578 /* update watermarks */
2579 rval = meta_sp_update_wm(sp, msp, extlist, ep);
2580
2581 meta_sp_list_free(&extlist);
2582 return (rval);
2583 }
2584
2585 /*
2586 * FUNCTION: meta_sp_read_wm()
2587 * INPUT: sp - setname for component
2588 * compnp - mdname_t for component
2589 * offset - the offset of the watermark to read (sectors)
2590 * OUTPUT: wm - the watermark structure to read into
2591 * ep - return error pointer
2592 * RETURNS: int - -1 if error, 0 on success
2593 * PURPOSE: seeks out to the requested offset and reads a watermark.
2594 * It then verifies that the magic number is correct and
2595 * that the checksum is valid, returning an error if either
2596 * is wrong.
2597 */
2598 static int
meta_sp_read_wm(mdsetname_t * sp,mdname_t * compnp,mp_watermark_t * wm,sp_ext_offset_t offset,md_error_t * ep)2599 meta_sp_read_wm(
2600 mdsetname_t *sp,
2601 mdname_t *compnp,
2602 mp_watermark_t *wm,
2603 sp_ext_offset_t offset,
2604 md_error_t *ep
2605 )
2606 {
2607 md_sp_read_wm_t read_params;
2608
2609 /*
2610 * make sure block offset does not overflow 2^64 bytes and it's a
2611 * multiple of the block size.
2612 */
2613 assert(offset <= (1LL << (64 - DEV_BSHIFT)));
2614 /* LINTED */
2615 assert((sizeof (*wm) % DEV_BSIZE) == 0);
2616
2617 (void) memset(wm, 0, sizeof (*wm));
2618
2619 (void) memset(&read_params, 0, sizeof (read_params));
2620 read_params.rdev = compnp->dev;
2621 read_params.wmp = (uintptr_t)wm;
2622 read_params.offset = offset;
2623 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno);
2624
2625 if (metaioctl(MD_IOC_SPREADWM, &read_params,
2626 &read_params.mde, compnp->cname) != 0) {
2627
2628 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2629 "Extent header read failed, block %llu.\n"), offset);
2630 return (mdstealerror(ep, &read_params.mde));
2631 }
2632
2633 /* make sure magic number is correct */
2634 if (wm->wm_magic != MD_SP_MAGIC) {
2635 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2636 "found incorrect magic number %x, expected %x.\n"),
2637 wm->wm_magic, MD_SP_MAGIC);
2638 /*
2639 * Pass NULL for the device name as we don't have
2640 * valid watermark contents.
2641 */
2642 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL));
2643 }
2644
2645 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
2646 sizeof (*wm), NULL)) {
2647 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2648 "found incorrect checksum %x.\n"),
2649 wm->wm_checksum);
2650 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname));
2651 }
2652
2653 return (0);
2654 }
2655
2656 /*
2657 * **************************************************************************
2658 * Query Functions
2659 * **************************************************************************
2660 */
2661
2662 /*
2663 * IMPORTANT NOTE: This is a static function that assumes that
2664 * its input parameters have been checked and
2665 * have valid values that lie within acceptable
2666 * ranges.
2667 *
2668 * FUNCTION: meta_sp_enough_space()
2669 * INPUT: desired_number_of_sps - the number of soft partitions desired;
2670 * must be > 0
2671 * desired_sp_size - the desired soft partition size in blocks;
2672 * must be > 0
2673 * extent_listpp - a reference to a reference to an extent
2674 * list that lists the extents on a device;
2675 * must be a reference to a reference to a
2676 * valid extent list
2677 * alignment - the desired data space alignment for the sp's
2678 * OUTPUT: boolean_t return value
2679 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent
2680 * list to create the desired soft partitions,
2681 * B_FALSE if there's not enough space
2682 * PURPOSE: determines whether there's enough free space in an extent
2683 * list to allow creation of a set of soft partitions
2684 */
2685 static boolean_t
meta_sp_enough_space(int desired_number_of_sps,blkcnt_t desired_sp_size,sp_ext_node_t ** extent_listpp,sp_ext_length_t alignment)2686 meta_sp_enough_space(
2687 int desired_number_of_sps,
2688 blkcnt_t desired_sp_size,
2689 sp_ext_node_t **extent_listpp,
2690 sp_ext_length_t alignment
2691 )
2692 {
2693 boolean_t enough_space;
2694 int number_of_sps;
2695 int number_of_extents_used;
2696 sp_ext_length_t desired_ext_length = desired_sp_size;
2697
2698 enough_space = B_TRUE;
2699 number_of_sps = 0;
2700 while ((enough_space == B_TRUE) &&
2701 (number_of_sps < desired_number_of_sps)) {
2702 /*
2703 * Use the extent allocation algorithm implemented by
2704 * meta_sp_alloc_by_len() to test whether the free
2705 * extents in the extent list referenced by *extent_listpp
2706 * contain enough space to accomodate a soft partition
2707 * of size desired_ext_length.
2708 *
2709 * Repeat the test <desired_number_of_sps> times
2710 * or until it fails, whichever comes first,
2711 * each time allocating the extents required to
2712 * create the soft partition without actually
2713 * creating the soft partition.
2714 */
2715 number_of_extents_used = meta_sp_alloc_by_len(
2716 TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2717 extent_listpp, &desired_ext_length,
2718 NO_OFFSET, alignment);
2719 if (number_of_extents_used == -1) {
2720 enough_space = B_FALSE;
2721 } else {
2722 number_of_sps++;
2723 }
2724 }
2725 return (enough_space);
2726 }
2727
2728 /*
2729 * IMPORTANT NOTE: This is a static function that calls other functions
2730 * that check its mdsetnamep and device_mdnamep
2731 * input parameters, but expects extent_listpp to
2732 * be a initialized to a valid address to which
2733 * it can write a reference to the extent list that
2734 * it creates.
2735 *
2736 * FUNCTION: meta_sp_get_extent_list()
2737 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
2738 * for the set containing the device for
2739 * which the extents are to be listed
2740 * device_mdnamep - a reference to the mdname_t structure
2741 * for the device for which the extents
2742 * are to be listed
2743 * OUTPUT: *extent_listpp - a reference to the extent list for
2744 * the device; NULL if the function fails
2745 * *ep - the libmeta error encountered, if any
2746 * RETURNS: boolean_t - B_TRUE if the function call was successful,
2747 * B_FALSE if not
2748 * PURPOSE: gets the extent list for a device
2749 */
2750 static boolean_t
meta_sp_get_extent_list(mdsetname_t * mdsetnamep,mdname_t * device_mdnamep,sp_ext_node_t ** extent_listpp,md_error_t * ep)2751 meta_sp_get_extent_list(
2752 mdsetname_t *mdsetnamep,
2753 mdname_t *device_mdnamep,
2754 sp_ext_node_t **extent_listpp,
2755 md_error_t *ep
2756 )
2757 {
2758 diskaddr_t device_size_in_blocks;
2759 mdnamelist_t *sp_name_listp;
2760 diskaddr_t start_block_address_in_blocks;
2761
2762 *extent_listpp = NULL;
2763 sp_name_listp = NULL;
2764
2765 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep,
2766 device_mdnamep, ep);
2767 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) {
2768 if (getenv(META_SP_DEBUG)) {
2769 mde_perror(ep,
2770 "meta_sp_get_extent_list:meta_sp_get_start");
2771 }
2772 return (B_FALSE);
2773 }
2774
2775 device_size_in_blocks = metagetsize(device_mdnamep, ep);
2776 if (device_size_in_blocks == MD_DISKADDR_ERROR) {
2777 if (getenv(META_SP_DEBUG)) {
2778 mde_perror(ep,
2779 "meta_sp_get_extent_list:metagetsize");
2780 }
2781 return (B_FALSE);
2782 }
2783
2784 /*
2785 * Sanity check: the start block will have skipped an integer
2786 * number of cylinders, C. C will usually be zero. If (C > 0),
2787 * and the disk slice happens to only be C cylinders in total
2788 * size, we'll fail this check.
2789 */
2790 if (device_size_in_blocks <=
2791 (start_block_address_in_blocks + MD_SP_WMSIZE)) {
2792 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname);
2793 return (B_FALSE);
2794 }
2795
2796 /*
2797 * After this point, we will have allocated resources, so any
2798 * failure returns must be through the supplied "fail" label
2799 * to properly deallocate things.
2800 */
2801
2802 /*
2803 * Create an empty extent list that starts one watermark past
2804 * the start block of the device and ends one watermark before
2805 * the end of the device.
2806 */
2807 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2808 extent_listpp, NO_OFFSET,
2809 (sp_ext_length_t)start_block_address_in_blocks,
2810 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
2811 meta_sp_cmp_by_offset);
2812 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2813 extent_listpp, (sp_ext_offset_t)(device_size_in_blocks -
2814 MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER,
2815 NO_FLAGS, meta_sp_cmp_by_offset);
2816
2817 /*
2818 * Get the list of soft partitions that are already on the
2819 * device.
2820 */
2821 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep,
2822 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) {
2823 if (getenv(META_SP_DEBUG)) {
2824 mde_perror(ep,
2825 "meta_sp_get_extent_list:meta_sp_get_by_component");
2826 }
2827 goto fail;
2828 }
2829
2830 if (sp_name_listp != NULL) {
2831 /*
2832 * If there are soft partitions on the device, add the
2833 * extents used in them to the extent list.
2834 */
2835 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp,
2836 extent_listpp, ep) == -1) {
2837 if (getenv(META_SP_DEBUG)) {
2838 mde_perror(ep, "meta_sp_get_extent_list:"
2839 "meta_sp_extlist_from_namelist");
2840 }
2841 goto fail;
2842 }
2843 metafreenamelist(sp_name_listp);
2844 }
2845
2846 /*
2847 * Add free extents to the extent list to represent
2848 * the remaining regions of free space on the
2849 * device.
2850 */
2851 meta_sp_list_freefill(extent_listpp, device_size_in_blocks);
2852 return (B_TRUE);
2853
2854 fail:
2855 if (sp_name_listp != NULL) {
2856 metafreenamelist(sp_name_listp);
2857 }
2858
2859 if (*extent_listpp != NULL) {
2860 /*
2861 * meta_sp_list_free sets *extent_listpp to NULL.
2862 */
2863 meta_sp_list_free(extent_listpp);
2864 }
2865 return (B_FALSE);
2866 }
2867
2868 /*
2869 * IMPORTANT NOTE: This is a static function that calls other functions
2870 * that check its mdsetnamep and mddrivenamep
2871 * input parameters, but expects extent_listpp to
2872 * be a initialized to a valid address to which
2873 * it can write a reference to the extent list that
2874 * it creates.
2875 *
2876 * FUNCTION: meta_sp_get_extent_list_for_drive()
2877 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
2878 * for the set containing the drive for
2879 * which the extents are to be listed
2880 * mddrivenamep - a reference to the mddrivename_t structure
2881 * for the drive for which the extents
2882 * are to be listed
2883 * OUTPUT: *extent_listpp - a reference to the extent list for
2884 * the drive; NULL if the function fails
2885 * RETURNS: boolean_t - B_TRUE if the function call was successful,
2886 * B_FALSE if not
2887 * PURPOSE: gets the extent list for a drive when the entire drive
2888 * is to be soft partitioned
2889 */
2890 static boolean_t
meta_sp_get_extent_list_for_drive(mdsetname_t * mdsetnamep,mddrivename_t * mddrivenamep,sp_ext_node_t ** extent_listpp)2891 meta_sp_get_extent_list_for_drive(
2892 mdsetname_t *mdsetnamep,
2893 mddrivename_t *mddrivenamep,
2894 sp_ext_node_t **extent_listpp
2895 )
2896 {
2897 boolean_t can_use;
2898 diskaddr_t free_space;
2899 md_error_t mderror;
2900 mdvtoc_t proposed_vtoc;
2901 int repartition_options;
2902 int return_value;
2903 md_sp_t test_sp_struct;
2904
2905 can_use = B_TRUE;
2906 *extent_listpp = NULL;
2907 mderror = mdnullerror;
2908 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0,
2909 &mderror);
2910 if (test_sp_struct.compnamep == NULL) {
2911 can_use = B_FALSE;
2912 }
2913
2914 if (can_use == B_TRUE) {
2915 mderror = mdnullerror;
2916 repartition_options = 0;
2917 return_value = meta_check_sp(mdsetnamep, &test_sp_struct,
2918 MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror);
2919 if (return_value != 0) {
2920 can_use = B_FALSE;
2921 }
2922 }
2923
2924 if (can_use == B_TRUE) {
2925 mderror = mdnullerror;
2926 repartition_options = repartition_options |
2927 (MD_REPART_FORCE | MD_REPART_DONT_LABEL);
2928 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep,
2929 repartition_options, &proposed_vtoc, &mderror);
2930 if (return_value != 0) {
2931 can_use = B_FALSE;
2932 }
2933 }
2934
2935 if (can_use == B_TRUE) {
2936 free_space = proposed_vtoc.parts[MD_SLICE0].size;
2937 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) {
2938 can_use = B_FALSE;
2939 }
2940 }
2941
2942 if (can_use == B_TRUE) {
2943 /*
2944 * Create an extent list that starts with
2945 * a reserved extent that ends at the start
2946 * of the usable space on slice zero of the
2947 * proposed VTOC, ends with an extent that
2948 * reserves space for a watermark at the end
2949 * of slice zero, and contains a single free
2950 * extent that occupies the rest of the space
2951 * on the slice.
2952 *
2953 * NOTE:
2954 *
2955 * Don't use metagetstart() or metagetsize() to
2956 * find the usable space. They query the mdname_t
2957 * structure that represents an actual device to
2958 * determine the amount of space on the device that
2959 * contains metadata and the total amount of space
2960 * on the device. Since this function creates a
2961 * proposed extent list that doesn't reflect the
2962 * state of an actual device, there's no mdname_t
2963 * structure to be queried.
2964 *
2965 * When a drive is reformatted to prepare for
2966 * soft partitioning, all of slice seven is
2967 * reserved for metadata, all of slice zero is
2968 * available for soft partitioning, and all other
2969 * slices on the drive are empty. The proposed
2970 * extent list for the drive therefore contains
2971 * only three extents: a reserved extent that ends
2972 * at the start of the usable space on slice zero,
2973 * a single free extent that occupies all the usable
2974 * space on slice zero, and an ending extent that
2975 * reserves space for a watermark at the end of
2976 * slice zero.
2977 */
2978 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2979 extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START),
2980 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
2981 meta_sp_cmp_by_offset);
2982 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2983 extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE),
2984 MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS,
2985 meta_sp_cmp_by_offset);
2986 meta_sp_list_freefill(extent_listpp, free_space);
2987 }
2988 return (can_use);
2989 }
2990
2991 /*
2992 * FUNCTION: meta_sp_can_create_sps()
2993 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
2994 * for the set containing the device for
2995 * which the extents are to be listed
2996 * mdnamep - a reference to the mdname_t of the device
2997 * on which the soft parititions are to be created
2998 * number_of_sps - the desired number of soft partitions
2999 * sp_size - the desired soft partition size
3000 * OUTPUT: boolean_t return value
3001 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created,
3002 * B_FALSE if not
3003 * PURPOSE: determines whether a set of soft partitions can be created
3004 * on a device
3005 */
3006 boolean_t
meta_sp_can_create_sps(mdsetname_t * mdsetnamep,mdname_t * mdnamep,int number_of_sps,blkcnt_t sp_size)3007 meta_sp_can_create_sps(
3008 mdsetname_t *mdsetnamep,
3009 mdname_t *mdnamep,
3010 int number_of_sps,
3011 blkcnt_t sp_size
3012 )
3013 {
3014 sp_ext_node_t *extent_listp;
3015 boolean_t succeeded;
3016 md_error_t mde;
3017
3018 if ((number_of_sps > 0) && (sp_size > 0)) {
3019 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
3020 &extent_listp, &mde);
3021 } else {
3022 succeeded = B_FALSE;
3023 }
3024
3025 /*
3026 * We don't really care about an error return from the
3027 * alignment call; that will just result in passing zero,
3028 * which will be interpreted as no alignment.
3029 */
3030
3031 if (succeeded == B_TRUE) {
3032 succeeded = meta_sp_enough_space(number_of_sps,
3033 sp_size, &extent_listp,
3034 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde));
3035 meta_sp_list_free(&extent_listp);
3036 }
3037 return (succeeded);
3038 }
3039
3040 /*
3041 * FUNCTION: meta_sp_can_create_sps_on_drive()
3042 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3043 * for the set containing the drive for
3044 * which the extents are to be listed
3045 * mddrivenamep - a reference to the mddrivename_t of the drive
3046 * on which the soft parititions are to be created
3047 * number_of_sps - the desired number of soft partitions
3048 * sp_size - the desired soft partition size
3049 * OUTPUT: boolean_t return value
3050 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created,
3051 * B_FALSE if not
3052 * PURPOSE: determines whether a set of soft partitions can be created
3053 * on a drive if the entire drive is soft partitioned
3054 */
3055 boolean_t
meta_sp_can_create_sps_on_drive(mdsetname_t * mdsetnamep,mddrivename_t * mddrivenamep,int number_of_sps,blkcnt_t sp_size)3056 meta_sp_can_create_sps_on_drive(
3057 mdsetname_t *mdsetnamep,
3058 mddrivename_t *mddrivenamep,
3059 int number_of_sps,
3060 blkcnt_t sp_size
3061 )
3062 {
3063 sp_ext_node_t *extent_listp;
3064 boolean_t succeeded;
3065
3066 if ((number_of_sps > 0) && (sp_size > 0)) {
3067 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3068 mddrivenamep, &extent_listp);
3069 } else {
3070 succeeded = B_FALSE;
3071 }
3072
3073 /*
3074 * We don't care about alignment on the space call because
3075 * we're specifically dealing with a drive, which will have no
3076 * inherent alignment.
3077 */
3078
3079 if (succeeded == B_TRUE) {
3080 succeeded = meta_sp_enough_space(number_of_sps, sp_size,
3081 &extent_listp, SP_UNALIGNED);
3082 meta_sp_list_free(&extent_listp);
3083 }
3084 return (succeeded);
3085 }
3086
3087 /*
3088 * FUNCTION: meta_sp_get_free_space()
3089 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3090 * for the set containing the device for
3091 * which the free space is to be returned
3092 * mdnamep - a reference to the mdname_t of the device
3093 * for which the free space is to be returned
3094 * OUTPUT: blkcnt_t return value
3095 * RETURNS: blkcnt_t - the number of blocks of free space on the device
3096 * PURPOSE: returns the number of blocks of free space on a device
3097 */
3098 blkcnt_t
meta_sp_get_free_space(mdsetname_t * mdsetnamep,mdname_t * mdnamep)3099 meta_sp_get_free_space(
3100 mdsetname_t *mdsetnamep,
3101 mdname_t *mdnamep
3102 )
3103 {
3104 sp_ext_node_t *extent_listp;
3105 sp_ext_length_t free_blocks;
3106 boolean_t succeeded;
3107 md_error_t mde;
3108
3109 extent_listp = NULL;
3110 free_blocks = 0;
3111 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
3112 &extent_listp, &mde);
3113 if (succeeded == B_TRUE) {
3114 free_blocks = meta_sp_list_size(extent_listp,
3115 EXTTYP_FREE, INCLUDE_WM);
3116 meta_sp_list_free(&extent_listp);
3117 if (free_blocks > (10 * MD_SP_WMSIZE)) {
3118 /*
3119 * Subtract a safety margin for watermarks when
3120 * computing the number of blocks available for
3121 * use. The actual number of watermarks can't
3122 * be calculated without knowing the exact numbers
3123 * and sizes of both the free extents and the soft
3124 * partitions to be created. The calculation is
3125 * highly complex and error-prone even if those
3126 * quantities are known. The approximate value
3127 * 10 * MD_SP_WMSIZE is within a few blocks of the
3128 * correct value in all practical cases.
3129 */
3130 free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
3131 } else {
3132 free_blocks = 0;
3133 }
3134 } else {
3135 mdclrerror(&mde);
3136 }
3137
3138 return (free_blocks);
3139 }
3140
3141 /*
3142 * FUNCTION: meta_sp_get_free_space_on_drive()
3143 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3144 * for the set containing the drive for
3145 * which the free space is to be returned
3146 * mddrivenamep - a reference to the mddrivename_t of the drive
3147 * for which the free space is to be returned
3148 * OUTPUT: blkcnt_t return value
3149 * RETURNS: blkcnt_t - the number of blocks of free space on the drive
3150 * PURPOSE: returns the number of blocks of space usable for soft
3151 * partitions on an entire drive, if the entire drive is
3152 * soft partitioned
3153 */
3154 blkcnt_t
meta_sp_get_free_space_on_drive(mdsetname_t * mdsetnamep,mddrivename_t * mddrivenamep)3155 meta_sp_get_free_space_on_drive(
3156 mdsetname_t *mdsetnamep,
3157 mddrivename_t *mddrivenamep
3158 )
3159 {
3160 sp_ext_node_t *extent_listp;
3161 sp_ext_length_t free_blocks;
3162 boolean_t succeeded;
3163
3164 extent_listp = NULL;
3165 free_blocks = 0;
3166 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3167 mddrivenamep, &extent_listp);
3168 if (succeeded == B_TRUE) {
3169 free_blocks = meta_sp_list_size(extent_listp,
3170 EXTTYP_FREE, INCLUDE_WM);
3171 meta_sp_list_free(&extent_listp);
3172 if (free_blocks > (10 * MD_SP_WMSIZE)) {
3173 /*
3174 * Subtract a safety margin for watermarks when
3175 * computing the number of blocks available for
3176 * use. The actual number of watermarks can't
3177 * be calculated without knowing the exact numbers
3178 * and sizes of both the free extents and the soft
3179 * partitions to be created. The calculation is
3180 * highly complex and error-prone even if those
3181 * quantities are known. The approximate value
3182 * 10 * MD_SP_WMSIZE is within a few blocks of the
3183 * correct value in all practical cases.
3184 */
3185 free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
3186 } else {
3187 free_blocks = 0;
3188 }
3189 }
3190 return (free_blocks);
3191 }
3192
3193 /*
3194 * FUNCTION: meta_sp_get_number_of_possible_sps()
3195 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3196 * for the set containing the device for
3197 * which the number of possible soft partitions
3198 * is to be returned
3199 * mdnamep - a reference to the mdname_t of the device
3200 * for which the number of possible soft partitions
3201 * is to be returned
3202 * OUTPUT: int return value
3203 * RETURNS: int - the number of soft partitions of the desired size
3204 * that can be created on the device
3205 * PURPOSE: returns the number of soft partitions of a given size
3206 * that can be created on a device
3207 */
3208 int
meta_sp_get_number_of_possible_sps(mdsetname_t * mdsetnamep,mdname_t * mdnamep,blkcnt_t sp_size)3209 meta_sp_get_number_of_possible_sps(
3210 mdsetname_t *mdsetnamep,
3211 mdname_t *mdnamep,
3212 blkcnt_t sp_size
3213 )
3214 {
3215 sp_ext_node_t *extent_listp;
3216 int number_of_possible_sps;
3217 boolean_t succeeded;
3218 md_error_t mde;
3219 sp_ext_length_t alignment;
3220
3221 extent_listp = NULL;
3222 number_of_possible_sps = 0;
3223 if (sp_size > 0) {
3224 if ((succeeded = meta_sp_get_extent_list(mdsetnamep,
3225 mdnamep, &extent_listp, &mde)) == B_FALSE)
3226 mdclrerror(&mde);
3227 } else {
3228 succeeded = B_FALSE;
3229 }
3230
3231 if (succeeded == B_TRUE) {
3232 alignment = meta_sp_get_default_alignment(mdsetnamep,
3233 mdnamep, &mde);
3234 }
3235
3236 while (succeeded == B_TRUE) {
3237 /*
3238 * Keep allocating space from the extent list
3239 * for soft partitions of the desired size until
3240 * there's not enough free space left in the list
3241 * for another soft partiition of that size.
3242 * Add one to the number of possible soft partitions
3243 * for each soft partition for which there is
3244 * enough free space left.
3245 */
3246 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
3247 sp_size, &extent_listp, alignment);
3248 if (succeeded == B_TRUE) {
3249 number_of_possible_sps++;
3250 }
3251 }
3252 if (extent_listp != NULL) {
3253 meta_sp_list_free(&extent_listp);
3254 }
3255 return (number_of_possible_sps);
3256 }
3257
3258 /*
3259 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive()
3260 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3261 * for the set containing the drive for
3262 * which the number of possible soft partitions
3263 * is to be returned
3264 * mddrivenamep - a reference to the mddrivename_t of the drive
3265 * for which the number of possible soft partitions
3266 * is to be returned
3267 * sp_size - the size in blocks of the proposed soft partitions
3268 * OUTPUT: int return value
3269 * RETURNS: int - the number of soft partitions of the desired size
3270 * that can be created on the drive
3271 * PURPOSE: returns the number of soft partitions of a given size
3272 * that can be created on a drive, if the entire drive is
3273 * soft partitioned
3274 */
3275 int
meta_sp_get_number_of_possible_sps_on_drive(mdsetname_t * mdsetnamep,mddrivename_t * mddrivenamep,blkcnt_t sp_size)3276 meta_sp_get_number_of_possible_sps_on_drive(
3277 mdsetname_t *mdsetnamep,
3278 mddrivename_t *mddrivenamep,
3279 blkcnt_t sp_size
3280 )
3281 {
3282 sp_ext_node_t *extent_listp;
3283 int number_of_possible_sps;
3284 boolean_t succeeded;
3285
3286 extent_listp = NULL;
3287 number_of_possible_sps = 0;
3288 if (sp_size > 0) {
3289 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3290 mddrivenamep, &extent_listp);
3291 } else {
3292 succeeded = B_FALSE;
3293 }
3294 while (succeeded == B_TRUE) {
3295 /*
3296 * Keep allocating space from the extent list
3297 * for soft partitions of the desired size until
3298 * there's not enough free space left in the list
3299 * for another soft partition of that size.
3300 * Add one to the number of possible soft partitions
3301 * for each soft partition for which there is
3302 * enough free space left.
3303 *
3304 * Since it's a drive, not a metadevice, make no
3305 * assumptions about alignment.
3306 */
3307 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
3308 sp_size, &extent_listp, SP_UNALIGNED);
3309 if (succeeded == B_TRUE) {
3310 number_of_possible_sps++;
3311 }
3312 }
3313 if (extent_listp != NULL) {
3314 meta_sp_list_free(&extent_listp);
3315 }
3316 return (number_of_possible_sps);
3317 }
3318
3319 /*
3320 * FUNCTION: meta_sp_get_possible_sp_size()
3321 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3322 * for the set containing the device for
3323 * which the possible soft partition size
3324 * is to be returned
3325 * mdnamep - a reference to the mdname_t of the device
3326 * for which the possible soft partition size
3327 * is to be returned
3328 * number_of_sps - the desired number of soft partitions
3329 * OUTPUT: blkcnt_t return value
3330 * RETURNS: blkcnt_t - the possible soft partition size in blocks
3331 * PURPOSE: returns the maximum possible size of each of a given number of
3332 * soft partitions of equal size that can be created on a device
3333 */
3334 blkcnt_t
meta_sp_get_possible_sp_size(mdsetname_t * mdsetnamep,mdname_t * mdnamep,int number_of_sps)3335 meta_sp_get_possible_sp_size(
3336 mdsetname_t *mdsetnamep,
3337 mdname_t *mdnamep,
3338 int number_of_sps
3339 )
3340 {
3341 blkcnt_t free_blocks;
3342 blkcnt_t sp_size;
3343 boolean_t succeeded;
3344
3345 sp_size = 0;
3346 if (number_of_sps > 0) {
3347 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep);
3348 sp_size = free_blocks / number_of_sps;
3349 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
3350 number_of_sps, sp_size);
3351 while ((succeeded == B_FALSE) && (sp_size > 0)) {
3352 /*
3353 * To compensate for space that may have been
3354 * occupied by watermarks, reduce sp_size by a
3355 * number of blocks equal to the number of soft
3356 * partitions desired, and test again to see
3357 * whether the desired number of soft partitions
3358 * can be created.
3359 */
3360 sp_size = sp_size - ((blkcnt_t)number_of_sps);
3361 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
3362 number_of_sps, sp_size);
3363 }
3364 if (sp_size < 0) {
3365 sp_size = 0;
3366 }
3367 }
3368 return (sp_size);
3369 }
3370
3371 /*
3372 * FUNCTION: meta_sp_get_possible_sp_size_on_drive()
3373 * INPUT: mdsetnamep - a reference to the mdsetname_t structure
3374 * for the set containing the drive for
3375 * which the possible soft partition size
3376 * is to be returned
3377 * mddrivenamep - a reference to the mddrivename_t of the drive
3378 * for which the possible soft partition size
3379 * is to be returned
3380 * number_of_sps - the desired number of soft partitions
3381 * OUTPUT: blkcnt_t return value
3382 * RETURNS: blkcnt_t - the possible soft partition size in blocks
3383 * PURPOSE: returns the maximum possible size of each of a given number of
3384 * soft partitions of equal size that can be created on a drive
3385 * if the entire drive is soft partitioned
3386 */
3387 blkcnt_t
meta_sp_get_possible_sp_size_on_drive(mdsetname_t * mdsetnamep,mddrivename_t * mddrivenamep,int number_of_sps)3388 meta_sp_get_possible_sp_size_on_drive(
3389 mdsetname_t *mdsetnamep,
3390 mddrivename_t *mddrivenamep,
3391 int number_of_sps
3392 )
3393 {
3394 blkcnt_t free_blocks;
3395 blkcnt_t sp_size;
3396 boolean_t succeeded;
3397
3398 sp_size = 0;
3399 if (number_of_sps > 0) {
3400 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep,
3401 mddrivenamep);
3402 sp_size = free_blocks / number_of_sps;
3403 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
3404 mddrivenamep, number_of_sps, sp_size);
3405 while ((succeeded == B_FALSE) && (sp_size > 0)) {
3406 /*
3407 * To compensate for space that may have been
3408 * occupied by watermarks, reduce sp_size by a
3409 * number of blocks equal to the number of soft
3410 * partitions desired, and test again to see
3411 * whether the desired number of soft partitions
3412 * can be created.
3413 */
3414 sp_size = sp_size - ((blkcnt_t)number_of_sps);
3415 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
3416 mddrivenamep, number_of_sps, sp_size);
3417 }
3418 if (sp_size < 0) {
3419 sp_size = 0;
3420 }
3421 }
3422 return (sp_size);
3423 }
3424
3425 /*
3426 * **************************************************************************
3427 * Unit Structure Manipulation Functions *
3428 * **************************************************************************
3429 */
3430
3431 /*
3432 * FUNCTION: meta_sp_fillextarray()
3433 * INPUT: mp - the unit structure to fill
3434 * extlist - the list of extents to fill with
3435 * OUTPUT: none
3436 * RETURNS: void
3437 * PURPOSE: fills in the unit structure extent list with the extents
3438 * specified by extlist. Only extents in extlist with the
3439 * EXTFLG_UPDATE flag are changed in the unit structure,
3440 * and the index into the unit structure is the sequence
3441 * number in the extent list. After all of the nodes have
3442 * been updated the virtual offsets in the unit structure
3443 * are updated to reflect the new lengths.
3444 */
3445 static void
meta_sp_fillextarray(mp_unit_t * mp,sp_ext_node_t * extlist)3446 meta_sp_fillextarray(
3447 mp_unit_t *mp,
3448 sp_ext_node_t *extlist
3449 )
3450 {
3451 int i;
3452 sp_ext_node_t *ext;
3453 sp_ext_offset_t curvoff = 0LL;
3454
3455 assert(mp != NULL);
3456
3457 /* go through the allocation list and fill in our unit structure */
3458 for (ext = extlist; ext != NULL; ext = ext->ext_next) {
3459 if ((ext->ext_type == EXTTYP_ALLOC) &&
3460 (ext->ext_flags & EXTFLG_UPDATE) != 0) {
3461 mp->un_ext[ext->ext_seq].un_poff =
3462 ext->ext_offset + MD_SP_WMSIZE;
3463 mp->un_ext[ext->ext_seq].un_len =
3464 ext->ext_length - MD_SP_WMSIZE;
3465 }
3466 }
3467
3468 for (i = 0; i < mp->un_numexts; i++) {
3469 assert(mp->un_ext[i].un_poff != 0);
3470 assert(mp->un_ext[i].un_len != 0);
3471 mp->un_ext[i].un_voff = curvoff;
3472 curvoff += mp->un_ext[i].un_len;
3473 }
3474 }
3475
3476 /*
3477 * FUNCTION: meta_sp_createunit()
3478 * INPUT: np - the name of the device to create a unit structure for
3479 * compnp - the name of the device the soft partition is on
3480 * extlist - the extent list to populate the new unit with
3481 * numexts - the number of extents in the extent list
3482 * len - the total size of the soft partition (sectors)
3483 * status - the initial status of the unit structure
3484 * OUTPUT: ep - return error pointer
3485 * RETURNS: mp_unit_t * - the new unit structure.
3486 * PURPOSE: allocates and fills in a new soft partition unit
3487 * structure to be passed to the soft partitioning driver
3488 * for creation.
3489 */
3490 static mp_unit_t *
meta_sp_createunit(mdname_t * np,mdname_t * compnp,sp_ext_node_t * extlist,int numexts,sp_ext_length_t len,sp_status_t status,md_error_t * ep)3491 meta_sp_createunit(
3492 mdname_t *np,
3493 mdname_t *compnp,
3494 sp_ext_node_t *extlist,
3495 int numexts,
3496 sp_ext_length_t len,
3497 sp_status_t status,
3498 md_error_t *ep
3499 )
3500 {
3501 mp_unit_t *mp;
3502 uint_t ms_size;
3503
3504 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) +
3505 (numexts * sizeof (mp->un_ext[0]));
3506
3507 mp = Zalloc(ms_size);
3508
3509 /* fill in fields in common unit structure */
3510 mp->c.un_type = MD_METASP;
3511 mp->c.un_size = ms_size;
3512 MD_SID(mp) = meta_getminor(np->dev);
3513 mp->c.un_total_blocks = len;
3514 mp->c.un_actual_tb = len;
3515
3516 /* set up geometry */
3517 (void) meta_sp_setgeom(np, compnp, mp, ep);
3518
3519 /* if we're building on metadevice we can't parent */
3520 if (metaismeta(compnp))
3521 MD_CAPAB(mp) = MD_CANT_PARENT;
3522 else
3523 MD_CAPAB(mp) = MD_CAN_PARENT;
3524
3525 /* fill soft partition-specific fields */
3526 mp->un_dev = compnp->dev;
3527 mp->un_key = compnp->key;
3528
3529 /* mdname_t start_blk field is not 64-bit! */
3530 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk;
3531 mp->un_status = status;
3532 mp->un_numexts = numexts;
3533 mp->un_length = len;
3534
3535 /* fill in the extent array */
3536 meta_sp_fillextarray(mp, extlist);
3537
3538 return (mp);
3539 }
3540
3541 /*
3542 * FUNCTION: meta_sp_updateunit()
3543 * INPUT: np - name structure for the metadevice being updated
3544 * old_un - the original unit structure that is being updated
3545 * extlist - the extent list to populate the new unit with
3546 * grow_len - the amount by which the partition is being grown
3547 * numexts - the number of extents in the extent list
3548 * ep - return error pointer
3549 * OUTPUT: none
3550 * RETURNS: mp_unit_t * - the updated unit structure
3551 * PURPOSE: allocates and fills in a new soft partition unit structure to
3552 * be passed to the soft partitioning driver for creation. The
3553 * old unit structure is first copied in, and then the updated
3554 * extents are changed in the new unit structure. This is
3555 * typically used when the size of an existing unit is changed.
3556 */
3557 static mp_unit_t *
meta_sp_updateunit(mdname_t * np,mp_unit_t * old_un,sp_ext_node_t * extlist,sp_ext_length_t grow_len,int numexts,md_error_t * ep)3558 meta_sp_updateunit(
3559 mdname_t *np,
3560 mp_unit_t *old_un,
3561 sp_ext_node_t *extlist,
3562 sp_ext_length_t grow_len,
3563 int numexts,
3564 md_error_t *ep
3565 )
3566 {
3567 mp_unit_t *new_un;
3568 sp_ext_length_t new_len;
3569 uint_t new_size;
3570
3571 assert(old_un != NULL);
3572 assert(extlist != NULL);
3573
3574 /* allocate new unit structure and copy in old unit */
3575 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) +
3576 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0]));
3577 new_len = old_un->un_length + grow_len;
3578 new_un = Zalloc(new_size);
3579 bcopy(old_un, new_un, old_un->c.un_size);
3580
3581 /* update size and geometry information */
3582 new_un->c.un_size = new_size;
3583 new_un->un_length = new_len;
3584 new_un->c.un_total_blocks = new_len;
3585 new_un->c.un_actual_tb = new_len;
3586 if (meta_adjust_geom((md_unit_t *)new_un, np,
3587 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct,
3588 0, ep) != 0) {
3589 Free(new_un);
3590 return (NULL);
3591 }
3592
3593 /* update extent information */
3594 new_un->un_numexts += numexts;
3595
3596 meta_sp_fillextarray(new_un, extlist);
3597
3598 return (new_un);
3599 }
3600
3601 /*
3602 * FUNCTION: meta_get_sp()
3603 * INPUT: sp - the set name for the device to get
3604 * np - the name of the device to get
3605 * OUTPUT: ep - return error pointer
3606 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition
3607 * PURPOSE: interface to the rest of libmeta for fetching a unit structure
3608 * for the named device. Just a wrapper for meta_get_sp_common().
3609 */
3610 md_sp_t *
meta_get_sp(mdsetname_t * sp,mdname_t * np,md_error_t * ep)3611 meta_get_sp(
3612 mdsetname_t *sp,
3613 mdname_t *np,
3614 md_error_t *ep
3615 )
3616 {
3617 return (meta_get_sp_common(sp, np, 0, ep));
3618 }
3619
3620 /*
3621 * FUNCTION: meta_get_sp_common()
3622 * INPUT: sp - the set name for the device to get
3623 * np - the name of the device to get
3624 * fast - whether to use the cache or not (NOT IMPLEMENTED!)
3625 * OUTPUT: ep - return error pointer
3626 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition,
3627 * NULL if np is not a soft partition
3628 * PURPOSE: common routine for fetching a soft partition unit structure
3629 */
3630 md_sp_t *
meta_get_sp_common(mdsetname_t * sp,mdname_t * np,int fast,md_error_t * ep)3631 meta_get_sp_common(
3632 mdsetname_t *sp,
3633 mdname_t *np,
3634 int fast,
3635 md_error_t *ep
3636 )
3637 {
3638 mddrivename_t *dnp = np->drivenamep;
3639 char *miscname;
3640 mp_unit_t *mp;
3641 md_sp_t *msp;
3642 int i;
3643
3644 /* must have set */
3645 assert(sp != NULL);
3646
3647 /* short circuit */
3648 if (dnp->unitp != NULL) {
3649 if (dnp->unitp->type != MD_METASP)
3650 return (NULL);
3651 return ((md_sp_t *)dnp->unitp);
3652 }
3653 /* get miscname and unit */
3654 if ((miscname = metagetmiscname(np, ep)) == NULL)
3655 return (NULL);
3656
3657 if (strcmp(miscname, MD_SP) != 0) {
3658 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname);
3659 return (NULL);
3660 }
3661
3662 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
3663 return (NULL);
3664
3665 assert(mp->c.un_type == MD_METASP);
3666
3667 /* allocate soft partition */
3668 msp = Zalloc(sizeof (*msp));
3669
3670 /* get the common information */
3671 msp->common.namep = np;
3672 msp->common.type = mp->c.un_type;
3673 msp->common.state = mp->c.un_status;
3674 msp->common.capabilities = mp->c.un_capabilities;
3675 msp->common.parent = mp->c.un_parent;
3676 msp->common.size = mp->c.un_total_blocks;
3677 msp->common.user_flags = mp->c.un_user_flags;
3678 msp->common.revision = mp->c.un_revision;
3679
3680 /* get soft partition information */
3681 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL)
3682 goto out;
3683
3684 /*
3685 * Fill in the key and the start block. Note that the start
3686 * block in the unit structure is 64 bits but the name pointer
3687 * only supports 32 bits.
3688 */
3689 msp->compnamep->key = mp->un_key;
3690 msp->compnamep->start_blk = mp->un_start_blk;
3691
3692 /* fill in status field */
3693 msp->status = mp->un_status;
3694
3695 /* allocate the extents */
3696 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val));
3697 msp->ext.ext_len = mp->un_numexts;
3698
3699 /* do the extents for this soft partition */
3700 for (i = 0; i < mp->un_numexts; i++) {
3701 struct mp_ext *mde = &mp->un_ext[i];
3702 md_sp_ext_t *extp = &msp->ext.ext_val[i];
3703
3704 extp->voff = mde->un_voff;
3705 extp->poff = mde->un_poff;
3706 extp->len = mde->un_len;
3707 }
3708
3709 /* cleanup, return success */
3710 Free(mp);
3711 dnp->unitp = (md_common_t *)msp;
3712 return (msp);
3713
3714 out:
3715 /* clean up and return error */
3716 Free(mp);
3717 Free(msp);
3718 return (NULL);
3719 }
3720
3721
3722 /*
3723 * FUNCTION: meta_init_sp()
3724 * INPUT: spp - the set name for the new device
3725 * argc - the remaining argument count for the metainit cmdline
3726 * argv - the remainder of the unparsed command line
3727 * options - global options parsed by metainit
3728 * OUTPUT: ep - return error pointer
3729 * RETURNS: int - -1 failure, 0 success
3730 * PURPOSE: provides the command line parsing and name management overhead
3731 * for creating a new soft partition. Ultimately this calls
3732 * meta_create_sp() which does the real work of allocating space
3733 * for the new soft partition.
3734 */
3735 int
meta_init_sp(mdsetname_t ** spp,int argc,char * argv[],mdcmdopts_t options,md_error_t * ep)3736 meta_init_sp(
3737 mdsetname_t **spp,
3738 int argc,
3739 char *argv[],
3740 mdcmdopts_t options,
3741 md_error_t *ep
3742 )
3743 {
3744 char *compname = NULL;
3745 mdname_t *spcompnp = NULL; /* name of component volume */
3746 char *devname = argv[0]; /* unit name */
3747 mdname_t *np = NULL; /* name of soft partition */
3748 md_sp_t *msp = NULL;
3749 int c;
3750 int old_optind;
3751 sp_ext_length_t len = 0LL;
3752 int rval = -1;
3753 uint_t seq;
3754 int oflag;
3755 int failed;
3756 mddrivename_t *dnp = NULL;
3757 sp_ext_length_t alignment = 0LL;
3758 sp_ext_node_t *extlist = NULL;
3759
3760 assert(argc > 0);
3761
3762 /* expect sp name, -p, optional -e, compname, and size parameters */
3763 /* grab soft partition name */
3764 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL)
3765 goto out;
3766
3767 /* see if it exists already */
3768 if (metagetmiscname(np, ep) != NULL) {
3769 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
3770 meta_getminor(np->dev), devname);
3771 goto out;
3772 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
3773 goto out;
3774 } else {
3775 mdclrerror(ep);
3776 }
3777 --argc, ++argv;
3778
3779 if (argc == 0)
3780 goto syntax;
3781
3782 /* grab -p */
3783 if (strcmp(argv[0], "-p") != 0)
3784 goto syntax;
3785 --argc, ++argv;
3786
3787 if (argc == 0)
3788 goto syntax;
3789
3790 /* see if -e is there */
3791 if (strcmp(argv[0], "-e") == 0) {
3792 /* use the whole disk */
3793 options |= MDCMD_USE_WHOLE_DISK;
3794 --argc, ++argv;
3795 }
3796
3797 if (argc == 0)
3798 goto syntax;
3799
3800 /* get component name */
3801 compname = Strdup(argv[0]);
3802
3803 if (options & MDCMD_USE_WHOLE_DISK) {
3804 if ((dnp = metadrivename(spp, compname, ep)) == NULL) {
3805 goto out;
3806 }
3807 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) {
3808 goto out;
3809 }
3810 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) {
3811 goto out;
3812 }
3813 assert(*spp != NULL);
3814
3815 if (!(options & MDCMD_NOLOCK)) {
3816 /* grab set lock */
3817 if (meta_lock(*spp, TRUE, ep))
3818 goto out;
3819
3820 if (meta_check_ownership(*spp, ep) != 0)
3821 goto out;
3822 }
3823
3824 /* allocate the soft partition */
3825 msp = Zalloc(sizeof (*msp));
3826
3827 /* setup common */
3828 msp->common.namep = np;
3829 msp->common.type = MD_METASP;
3830
3831 compname = spcompnp->cname;
3832
3833 assert(spcompnp->rname != NULL);
3834 --argc, ++argv;
3835
3836 if (argc == 0) {
3837 goto syntax;
3838 }
3839
3840 if (*argv[0] == '-') {
3841 /*
3842 * parse any other command line options, this includes
3843 * the recovery options -o and -b. The special thing
3844 * with these options is that the len needs to be
3845 * kept track of otherwise when the geometry of the
3846 * "device" is built it will create an invalid geometry
3847 */
3848 old_optind = optind = 0;
3849 opterr = 0;
3850 oflag = 0;
3851 seq = 0;
3852 failed = 0;
3853 while ((c = getopt(argc, argv, "A:o:b:")) != -1) {
3854 sp_ext_offset_t offset;
3855 sp_ext_length_t length;
3856 longlong_t tmp_size;
3857
3858 switch (c) {
3859 case 'A': /* data alignment */
3860 if (meta_sp_parsesizestring(optarg,
3861 &alignment) == -1) {
3862 failed = 1;
3863 }
3864 break;
3865 case 'o': /* offset in the partition */
3866 if (oflag == 1) {
3867 failed = 1;
3868 } else {
3869 tmp_size = atoll(optarg);
3870 if (tmp_size <= 0) {
3871 failed = 1;
3872 } else {
3873 oflag = 1;
3874 options |= MDCMD_DIRECT;
3875
3876 offset = tmp_size;
3877 }
3878 }
3879
3880 break;
3881 case 'b': /* number of blocks */
3882 if (oflag == 0) {
3883 failed = 1;
3884 } else {
3885 tmp_size = atoll(optarg);
3886 if (tmp_size <= 0) {
3887 failed = 1;
3888 } else {
3889 oflag = 0;
3890
3891 length = tmp_size;
3892
3893 /* we have a pair of values */
3894 meta_sp_list_insert(*spp, np,
3895 &extlist, offset, length,
3896 EXTTYP_ALLOC, seq++,
3897 EXTFLG_UPDATE,
3898 meta_sp_cmp_by_offset);
3899 len += length;
3900 }
3901 }
3902
3903 break;
3904 default:
3905 argc -= old_optind;
3906 argv += old_optind;
3907 goto options;
3908 }
3909
3910 if (failed) {
3911 argc -= old_optind;
3912 argv += old_optind;
3913 goto syntax;
3914 }
3915
3916 old_optind = optind;
3917 }
3918 argc -= optind;
3919 argv += optind;
3920
3921 /*
3922 * Must have matching pairs of -o and -b flags
3923 */
3924 if (oflag != 0)
3925 goto syntax;
3926
3927 /*
3928 * Can't specify both layout (indicated indirectly by
3929 * len being set by thye -o/-b cases above) AND
3930 * alignment
3931 */
3932 if ((len > 0LL) && (alignment > 0LL))
3933 goto syntax;
3934
3935 /*
3936 * sanity check the allocation list
3937 */
3938 if ((extlist != NULL) && meta_sp_list_overlaps(extlist))
3939 goto syntax;
3940 }
3941
3942 if (len == 0LL) {
3943 if (argc == 0)
3944 goto syntax;
3945 if (meta_sp_parsesize(argv[0], &len) == -1)
3946 goto syntax;
3947 --argc, ++argv;
3948 }
3949
3950 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val));
3951 msp->ext.ext_val->len = len;
3952 msp->compnamep = spcompnp;
3953
3954 /* we should be at the end */
3955 if (argc != 0)
3956 goto syntax;
3957
3958 /* create soft partition */
3959 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0)
3960 goto out;
3961 rval = 0;
3962
3963 /* let em know */
3964 if (options & MDCMD_PRINT) {
3965 (void) printf(dgettext(TEXT_DOMAIN,
3966 "%s: Soft Partition is setup\n"),
3967 devname);
3968 (void) fflush(stdout);
3969 }
3970 goto out;
3971
3972 syntax:
3973 /* syntax error */
3974 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv);
3975 goto out;
3976
3977 options:
3978 /* options error */
3979 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv);
3980 goto out;
3981
3982 out:
3983 if (msp != NULL) {
3984 if (msp->ext.ext_val != NULL) {
3985 Free(msp->ext.ext_val);
3986 }
3987 Free(msp);
3988 }
3989
3990 return (rval);
3991 }
3992
3993 /*
3994 * FUNCTION: meta_free_sp()
3995 * INPUT: msp - the soft partition unit to free
3996 * OUTPUT: none
3997 * RETURNS: void
3998 * PURPOSE: provides an interface from the rest of libmeta for freeing a
3999 * soft partition unit
4000 */
4001 void
meta_free_sp(md_sp_t * msp)4002 meta_free_sp(md_sp_t *msp)
4003 {
4004 Free(msp);
4005 }
4006
4007 /*
4008 * FUNCTION: meta_sp_issp()
4009 * INPUT: sp - the set name to check
4010 * np - the name to check
4011 * OUTPUT: ep - return error pointer
4012 * RETURNS: int - 0 means sp,np is a soft partition
4013 * 1 means sp,np is not a soft partition
4014 * PURPOSE: determines whether the given device is a soft partition
4015 * device. This is called by other metadevice check routines.
4016 */
4017 int
meta_sp_issp(mdsetname_t * sp,mdname_t * np,md_error_t * ep)4018 meta_sp_issp(
4019 mdsetname_t *sp,
4020 mdname_t *np,
4021 md_error_t *ep
4022 )
4023 {
4024 if (meta_get_sp_common(sp, np, 0, ep) == NULL)
4025 return (1);
4026
4027 return (0);
4028 }
4029
4030 /*
4031 * FUNCTION: meta_check_sp()
4032 * INPUT: sp - the set name to check
4033 * msp - the unit structure to check
4034 * options - creation options
4035 * OUTPUT: repart_options - options to be passed to
4036 * meta_repartition_drive()
4037 * ep - return error pointer
4038 * RETURNS: int - 0 ok to create on this component
4039 * -1 error or not ok to create on this component
4040 * PURPOSE: Checks to determine whether the rules for creation of
4041 * soft partitions allow creation of a soft partition on
4042 * the device described by the mdname_t structure referred
4043 * to by msp->compnamep.
4044 *
4045 * NOTE: Does NOT check to determine whether the extents
4046 * described in the md_sp_t structure referred to by
4047 * msp will fit on the device described by the mdname_t
4048 * structure located at msp->compnamep.
4049 */
4050 static int
meta_check_sp(mdsetname_t * sp,md_sp_t * msp,mdcmdopts_t options,int * repart_options,md_error_t * ep)4051 meta_check_sp(
4052 mdsetname_t *sp,
4053 md_sp_t *msp,
4054 mdcmdopts_t options,
4055 int *repart_options,
4056 md_error_t *ep
4057 )
4058 {
4059 md_common_t *mdp;
4060 mdname_t *compnp = msp->compnamep;
4061 uint_t slice;
4062 mddrivename_t *dnp;
4063 mdname_t *slicenp;
4064 mdvtoc_t *vtocp;
4065
4066 /* make sure it is in the set */
4067 if (meta_check_inset(sp, compnp, ep) != 0)
4068 return (-1);
4069
4070 if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
4071 uint_t rep_slice;
4072
4073 /*
4074 * check to make sure we can partition this drive.
4075 * we cannot continue if any of the following are
4076 * true:
4077 * The drive is a metadevice.
4078 * The drive contains a mounted slice.
4079 * The drive contains a slice being swapped to.
4080 * The drive contains slices which are part of other
4081 * metadevices.
4082 * The drive contains a metadb.
4083 */
4084 if (metaismeta(compnp))
4085 return (mddeverror(ep, MDE_IS_META, compnp->dev,
4086 compnp->cname));
4087
4088 assert(compnp->drivenamep != NULL);
4089
4090 /*
4091 * ensure that we have slice 0 since the disk will be
4092 * repartitioned in the USE_WHOLE_DISK case. this check
4093 * is redundant unless the user incorrectly specifies a
4094 * a fully qualified drive AND slice name (i.e.,
4095 * /dev/dsk/cXtXdXsX), which will be incorrectly
4096 * recognized as a drive name by the metaname code.
4097 */
4098
4099 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL)
4100 return (-1);
4101 if (slice != MD_SLICE0)
4102 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname));
4103
4104 dnp = compnp->drivenamep;
4105 if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
4106 return (-1);
4107
4108 for (slice = 0; slice < vtocp->nparts; slice++) {
4109
4110 /* only check if the slice really exists */
4111 if (vtocp->parts[slice].size == 0)
4112 continue;
4113
4114 slicenp = metaslicename(dnp, slice, ep);
4115 if (slicenp == NULL)
4116 return (-1);
4117
4118 /* check to ensure that it is not already in use */
4119 if (meta_check_inuse(sp,
4120 slicenp, MDCHK_INUSE, ep) != 0) {
4121 return (-1);
4122 }
4123
4124 /*
4125 * Up to this point, tests are applied to all
4126 * slices uniformly.
4127 */
4128
4129 if (slice == rep_slice) {
4130 /*
4131 * Tests inside the body of this
4132 * conditional are applied only to
4133 * slice seven.
4134 */
4135 if (meta_check_inmeta(sp, slicenp,
4136 options | MDCHK_ALLOW_MDDB |
4137 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0)
4138 return (-1);
4139
4140 /*
4141 * For slice seven, a metadb is NOT an
4142 * automatic failure. It merely means
4143 * that we're not allowed to muck
4144 * about with the partitioning of that
4145 * slice. We indicate this by masking
4146 * in the MD_REPART_LEAVE_REP flag.
4147 */
4148 if (metahasmddb(sp, slicenp, ep)) {
4149 assert(repart_options !=
4150 NULL);
4151 *repart_options |=
4152 MD_REPART_LEAVE_REP;
4153 }
4154
4155 /*
4156 * Skip the remaining tests for slice
4157 * seven
4158 */
4159 continue;
4160 }
4161
4162 /*
4163 * Tests below this point will be applied to
4164 * all slices EXCEPT for the replica slice.
4165 */
4166
4167
4168 /* check if component is in a metadevice */
4169 if (meta_check_inmeta(sp, slicenp, options, 0,
4170 -1, ep) != 0)
4171 return (-1);
4172
4173 /* check to see if component has a metadb */
4174 if (metahasmddb(sp, slicenp, ep))
4175 return (mddeverror(ep, MDE_HAS_MDDB,
4176 slicenp->dev, slicenp->cname));
4177 }
4178 /*
4179 * This should be all of the testing necessary when
4180 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of
4181 * meta_check_sp() is oriented towards component
4182 * arguments instead of disks.
4183 */
4184 goto meta_check_sp_ok;
4185
4186 }
4187
4188 /* check to ensure that it is not already in use */
4189 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) {
4190 return (-1);
4191 }
4192
4193 if (!metaismeta(compnp)) { /* handle non-metadevices */
4194
4195 /*
4196 * The component can have one or more soft partitions on it
4197 * already, but can't be part of any other type of metadevice,
4198 * so if it is used for a metadevice, but the metadevice
4199 * isn't a soft partition, return failure.
4200 */
4201
4202 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 &&
4203 meta_check_insp(sp, compnp, 0, -1, ep) == 0) {
4204 return (-1);
4205 }
4206 } else { /* handle metadevices */
4207 /* get underlying unit & check capabilities */
4208 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL)
4209 return (-1);
4210
4211 if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
4212 (! (mdp->capabilities & MD_CAN_SP)))
4213 return (mdmderror(ep, MDE_INVAL_UNIT,
4214 meta_getminor(compnp->dev), compnp->cname));
4215 }
4216
4217 meta_check_sp_ok:
4218 mdclrerror(ep);
4219 return (0);
4220 }
4221
4222 /*
4223 * FUNCTION: meta_create_sp()
4224 * INPUT: sp - the set name to create in
4225 * msp - the unit structure to create
4226 * oblist - an optional list of requested extents (-o/-b options)
4227 * options - creation options
4228 * alignment - data alignment
4229 * OUTPUT: ep - return error pointer
4230 * RETURNS: int - 0 success, -1 error
4231 * PURPOSE: does most of the work for creating a soft partition. If
4232 * metainit -p -e was used, first partition the drive. Then
4233 * create an extent list based on the existing soft partitions
4234 * and assume all space not used by them is free. Storage for
4235 * the new soft partition is allocated from the free extents
4236 * based on the length specified on the command line or the
4237 * oblist passed in. The unit structure is then committed and
4238 * the watermarks are updated. Finally, the status is changed to
4239 * Okay and the process is complete.
4240 */
4241 static int
meta_create_sp(mdsetname_t * sp,md_sp_t * msp,sp_ext_node_t * oblist,mdcmdopts_t options,sp_ext_length_t alignment,md_error_t * ep)4242 meta_create_sp(
4243 mdsetname_t *sp,
4244 md_sp_t *msp,
4245 sp_ext_node_t *oblist,
4246 mdcmdopts_t options,
4247 sp_ext_length_t alignment,
4248 md_error_t *ep
4249 )
4250 {
4251 mdname_t *np = msp->common.namep;
4252 mdname_t *compnp = msp->compnamep;
4253 mp_unit_t *mp = NULL;
4254 mdnamelist_t *keynlp = NULL, *spnlp = NULL;
4255 md_set_params_t set_params;
4256 int rval = -1;
4257 diskaddr_t comp_size;
4258 diskaddr_t sp_start;
4259 sp_ext_node_t *extlist = NULL;
4260 int numexts = 0; /* number of extents */
4261 int count = 0;
4262 int committed = 0;
4263 int repart_options = MD_REPART_FORCE;
4264 int create_flag = MD_CRO_32BIT;
4265 int mn_set_master = 0;
4266
4267 md_set_desc *sd;
4268 md_set_mmown_params_t *ownpar = NULL;
4269 int comp_is_mirror = 0;
4270
4271 /* validate soft partition */
4272 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0)
4273 return (-1);
4274
4275 if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
4276 if ((options & MDCMD_DOIT) != 0) {
4277 if (meta_repartition_drive(sp,
4278 compnp->drivenamep,
4279 repart_options,
4280 NULL, /* Don't return the VTOC */
4281 ep) != 0)
4282
4283 return (-1);
4284 } else {
4285 /*
4286 * If -n and -e are both specified, it doesn't make
4287 * sense to continue without actually partitioning
4288 * the drive.
4289 */
4290 return (0);
4291 }
4292 }
4293
4294 /* populate the start_blk field of the component name */
4295 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) ==
4296 MD_DISKADDR_ERROR) {
4297 rval = -1;
4298 goto out;
4299 }
4300
4301 if (options & MDCMD_DOIT) {
4302 /* store name in namespace */
4303 if (add_key_name(sp, compnp, &keynlp, ep) != 0) {
4304 rval = -1;
4305 goto out;
4306 }
4307 }
4308
4309 /*
4310 * Get a list of the soft partitions that currently reside on
4311 * the component. We should ALWAYS force reload the cache,
4312 * because if this is a single creation, there will not BE a
4313 * cached list, and if we're using the md.tab, we must rebuild
4314 * the list because it won't contain the previous (if any)
4315 * soft partition.
4316 */
4317 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
4318 if (count < 0) {
4319 /* error occured */
4320 rval = -1;
4321 goto out;
4322 }
4323
4324 /*
4325 * get the size of the underlying device. if the size is smaller
4326 * than or equal to the watermark size, we know there isn't
4327 * enough space.
4328 */
4329 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) {
4330 rval = -1;
4331 goto out;
4332 } else if (comp_size <= MD_SP_WMSIZE) {
4333 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname);
4334 rval = -1;
4335 goto out;
4336 }
4337 /*
4338 * seed extlist with reserved space at the beginning of the volume and
4339 * enough space for the end watermark. The end watermark always gets
4340 * updated, but if the underlying device changes size it may not be
4341 * pointed to until the extent before it is updated. Since the
4342 * end of the reserved space is where the first watermark starts,
4343 * the reserved extent should never be marked for updating.
4344 */
4345
4346 meta_sp_list_insert(NULL, NULL, &extlist,
4347 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
4348 meta_sp_list_insert(NULL, NULL, &extlist,
4349 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE,
4350 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
4351
4352 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
4353 rval = -1;
4354 goto out;
4355 }
4356
4357 metafreenamelist(spnlp);
4358
4359 if (getenv(META_SP_DEBUG)) {
4360 meta_sp_debug("meta_create_sp: list of used extents:\n");
4361 meta_sp_list_dump(extlist);
4362 }
4363
4364 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
4365
4366 /* get extent list from -o/-b options or from free space */
4367 if (options & MDCMD_DIRECT) {
4368 if (getenv(META_SP_DEBUG)) {
4369 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n");
4370 meta_sp_list_dump(oblist);
4371 }
4372
4373 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist);
4374 if (numexts == -1) {
4375 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname);
4376 rval = -1;
4377 goto out;
4378 }
4379 } else {
4380 numexts = meta_sp_alloc_by_len(sp, np, &extlist,
4381 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment :
4382 meta_sp_get_default_alignment(sp, compnp, ep));
4383 if (numexts == -1) {
4384 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname);
4385 rval = -1;
4386 goto out;
4387 }
4388 }
4389
4390 assert(extlist != NULL);
4391
4392 /* create soft partition */
4393 mp = meta_sp_createunit(msp->common.namep, msp->compnamep,
4394 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep);
4395
4396 create_flag = meta_check_devicesize(mp->c.un_total_blocks);
4397
4398 /* if we're not doing anything (metainit -n), return success */
4399 if (! (options & MDCMD_DOIT)) {
4400 rval = 0; /* success */
4401 goto out;
4402 }
4403
4404 (void) memset(&set_params, 0, sizeof (set_params));
4405
4406 if (create_flag == MD_CRO_64BIT) {
4407 mp->c.un_revision |= MD_64BIT_META_DEV;
4408 set_params.options = MD_CRO_64BIT;
4409 } else {
4410 mp->c.un_revision &= ~MD_64BIT_META_DEV;
4411 set_params.options = MD_CRO_32BIT;
4412 }
4413
4414 if (getenv(META_SP_DEBUG)) {
4415 meta_sp_debug("meta_create_sp: printing unit structure\n");
4416 meta_sp_printunit(mp);
4417 }
4418
4419 /*
4420 * Check to see if we're trying to create a partition on a mirror. If so
4421 * we may have to enforce an ownership change before writing the
4422 * watermark out.
4423 */
4424 if (metaismeta(compnp)) {
4425 char *miscname;
4426
4427 miscname = metagetmiscname(compnp, ep);
4428 if (miscname != NULL)
4429 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0);
4430 else
4431 comp_is_mirror = 0;
4432 } else {
4433 comp_is_mirror = 0;
4434 }
4435
4436 /*
4437 * For a multi-node environment we have to ensure that the master
4438 * node owns an underlying mirror before we issue the MD_IOCSET ioctl.
4439 * If the master does not own the device we will deadlock as the
4440 * implicit write of the watermarks (in sp_ioctl.c) will cause an
4441 * ownership change that will block as the MD_IOCSET is still in
4442 * progress. To close this window we force an owner change to occur
4443 * before issuing the MD_IOCSET. We cannot simply open the device and
4444 * write to it as this will only work for the first soft-partition
4445 * creation.
4446 */
4447
4448 if (comp_is_mirror && !metaislocalset(sp)) {
4449
4450 if ((sd = metaget_setdesc(sp, ep)) == NULL) {
4451 rval = -1;
4452 goto out;
4453 }
4454 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) {
4455 mn_set_master = 1;
4456 }
4457 }
4458
4459 set_params.mnum = MD_SID(mp);
4460 set_params.size = mp->c.un_size;
4461 set_params.mdp = (uintptr_t)mp;
4462 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum));
4463
4464 /* first phase of commit. */
4465 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
4466 np->cname) != 0) {
4467 (void) mdstealerror(ep, &set_params.mde);
4468 rval = -1;
4469 goto out;
4470 }
4471
4472 /* we've successfully committed the record */
4473 committed = 1;
4474
4475 /* write watermarks */
4476 /*
4477 * Special-case for Multi-node sets. As we now have a distributed DRL
4478 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case
4479 * unless we use a 'special' MN-capable ioctl to stage the watermark
4480 * update. This only affects the master-node in an MN set.
4481 */
4482 if (mn_set_master) {
4483 if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) {
4484 rval = -1;
4485 goto out;
4486 }
4487 } else {
4488 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
4489 rval = -1;
4490 goto out;
4491 }
4492 }
4493
4494 /* second phase of commit, set status to MD_SP_OK */
4495 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) {
4496 rval = -1;
4497 goto out;
4498 }
4499 rval = 0;
4500 out:
4501 Free(mp);
4502 if (ownpar)
4503 Free(ownpar);
4504
4505 if (extlist != NULL)
4506 meta_sp_list_free(&extlist);
4507
4508 if (rval != 0 && keynlp != NULL && committed != 1)
4509 (void) del_key_names(sp, keynlp, NULL);
4510
4511 metafreenamelist(keynlp);
4512
4513 return (rval);
4514 }
4515
4516 /*
4517 * **************************************************************************
4518 * Reset (metaclear) Functions *
4519 * **************************************************************************
4520 */
4521
4522 /*
4523 * FUNCTION: meta_sp_reset_common()
4524 * INPUT: sp - the set name of the device to reset
4525 * np - the name of the device to reset
4526 * msp - the unit structure to reset
4527 * options - metaclear options
4528 * OUTPUT: ep - return error pointer
4529 * RETURNS: int - 0 success, -1 error
4530 * PURPOSE: "resets", or more accurately deletes, the soft partition
4531 * specified. First the state is set to "deleting" and then the
4532 * watermarks are all cleared out. Once the watermarks have been
4533 * updated, the unit structure is deleted from the metadb.
4534 */
4535 static int
meta_sp_reset_common(mdsetname_t * sp,mdname_t * np,md_sp_t * msp,md_sp_reset_t reset_params,mdcmdopts_t options,md_error_t * ep)4536 meta_sp_reset_common(
4537 mdsetname_t *sp,
4538 mdname_t *np,
4539 md_sp_t *msp,
4540 md_sp_reset_t reset_params,
4541 mdcmdopts_t options,
4542 md_error_t *ep
4543 )
4544 {
4545 char *miscname;
4546 int rval = -1;
4547 int is_open = 0;
4548
4549 /* make sure that nobody owns us */
4550 if (MD_HAS_PARENT(msp->common.parent))
4551 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev),
4552 np->cname));
4553
4554 /* make sure that the soft partition isn't open */
4555 if ((is_open = meta_isopen(sp, np, ep, options)) < 0)
4556 return (-1);
4557 else if (is_open)
4558 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
4559 np->cname));
4560
4561 /* get miscname */
4562 if ((miscname = metagetmiscname(np, ep)) == NULL)
4563 return (-1);
4564
4565 /* fill in reset params */
4566 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno);
4567 reset_params.mnum = meta_getminor(np->dev);
4568 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0;
4569
4570 /*
4571 * clear soft partition - phase one.
4572 * place the soft partition into the "delete pending" state.
4573 */
4574 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0)
4575 return (-1);
4576
4577 /*
4578 * Now clear the watermarks. If the force flag is specified,
4579 * ignore any errors writing the watermarks and delete the unit
4580 * structure anyway. An error may leave the on-disk format in a
4581 * corrupt state. If force is not specified and we fail here,
4582 * the soft partition will remain in the "delete pending" state.
4583 */
4584 if ((meta_sp_clear_wm(sp, msp, ep) < 0) &&
4585 ((options & MDCMD_FORCE) == 0))
4586 goto out;
4587
4588 /*
4589 * clear soft partition - phase two.
4590 * the driver removes the soft partition from the metadb and
4591 * zeros out incore version.
4592 */
4593 if (metaioctl(MD_IOCRESET, &reset_params,
4594 &reset_params.mde, np->cname) != 0) {
4595 (void) mdstealerror(ep, &reset_params.mde);
4596 goto out;
4597 }
4598
4599 /*
4600 * Wait for the /dev to be cleaned up. Ignore the return
4601 * value since there's not much we can do.
4602 */
4603 (void) meta_update_devtree(meta_getminor(np->dev));
4604
4605 rval = 0; /* success */
4606
4607 if (options & MDCMD_PRINT) {
4608 (void) printf(dgettext(TEXT_DOMAIN,
4609 "%s: Soft Partition is cleared\n"),
4610 np->cname);
4611 (void) fflush(stdout);
4612 }
4613
4614 /*
4615 * if told to recurse and on a metadevice, then attempt to
4616 * clear the subdevices. Indicate failure if the clear fails.
4617 */
4618 if ((options & MDCMD_RECURSE) &&
4619 (metaismeta(msp->compnamep)) &&
4620 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0))
4621 rval = -1;
4622
4623 out:
4624 meta_invalidate_name(np);
4625 return (rval);
4626 }
4627
4628 /*
4629 * FUNCTION: meta_sp_reset()
4630 * INPUT: sp - the set name of the device to reset
4631 * np - the name of the device to reset
4632 * options - metaclear options
4633 * OUTPUT: ep - return error pointer
4634 * RETURNS: int - 0 success, -1 error
4635 * PURPOSE: provides the entry point to the rest of libmeta for deleting a
4636 * soft partition. If np is NULL, then soft partitions are
4637 * all deleted at the current level and then recursively deleted.
4638 * Otherwise, if a name is specified either directly or as a
4639 * result of a recursive operation, it deletes only that name.
4640 * Since something sitting under a soft partition may be parented
4641 * to it, we have to reparent that other device to another soft
4642 * partition on the same component if we're deleting the one it's
4643 * parented to.
4644 */
4645 int
meta_sp_reset(mdsetname_t * sp,mdname_t * np,mdcmdopts_t options,md_error_t * ep)4646 meta_sp_reset(
4647 mdsetname_t *sp,
4648 mdname_t *np,
4649 mdcmdopts_t options,
4650 md_error_t *ep
4651 )
4652 {
4653 md_sp_t *msp;
4654 int rval = -1;
4655 mdnamelist_t *spnlp = NULL, *nlp = NULL;
4656 md_sp_reset_t reset_params;
4657 int num_sp;
4658
4659 assert(sp != NULL);
4660
4661 /* reset/delete all soft paritions */
4662 if (np == NULL) {
4663 /*
4664 * meta_reset_all sets MDCMD_RECURSE, but this behavior
4665 * is incorrect for soft partitions. We want to clear
4666 * all soft partitions at a particular level in the
4667 * metadevice stack before moving to the next level.
4668 * Thus, we clear MDCMD_RECURSE from the options.
4669 */
4670 options &= ~MDCMD_RECURSE;
4671
4672 /* for each soft partition */
4673 rval = 0;
4674 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
4675 rval = -1;
4676
4677 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) {
4678 np = nlp->namep;
4679 if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
4680 rval = -1;
4681 break;
4682 }
4683 /*
4684 * meta_reset_all calls us twice to get soft
4685 * partitions at the top and bottom of the stack.
4686 * thus, if we have a parent, we'll get deleted
4687 * on the next call.
4688 */
4689 if (MD_HAS_PARENT(msp->common.parent))
4690 continue;
4691 /*
4692 * If this is a multi-node set, we send a series
4693 * of individual metaclear commands.
4694 */
4695 if (meta_is_mn_set(sp, ep)) {
4696 if (meta_mn_send_metaclear_command(sp,
4697 np->cname, options, 0, ep) != 0) {
4698 rval = -1;
4699 break;
4700 }
4701 } else {
4702 if (meta_sp_reset(sp, np, options, ep) != 0) {
4703 rval = -1;
4704 break;
4705 }
4706 }
4707 }
4708 /* cleanup return status */
4709 metafreenamelist(spnlp);
4710 return (rval);
4711 }
4712
4713 /* check the name */
4714 if (metachkmeta(np, ep) != 0)
4715 return (-1);
4716
4717 /* get the unit structure */
4718 if ((msp = meta_get_sp(sp, np, ep)) == NULL)
4719 return (-1);
4720
4721 /* clear out reset parameters */
4722 (void) memset(&reset_params, 0, sizeof (reset_params));
4723
4724 /* if our child is a metadevice, we need to deparent/reparent it */
4725 if (metaismeta(msp->compnamep)) {
4726 /* get sp's on this component */
4727 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep,
4728 &spnlp, 1, ep)) <= 0)
4729 /* no sp's on this device. error! */
4730 return (-1);
4731 else if (num_sp == 1)
4732 /* last sp on this device, so we deparent */
4733 reset_params.new_parent = MD_NO_PARENT;
4734 else {
4735 /* have to reparent this metadevice */
4736 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
4737 if (meta_getminor(nlp->namep->dev) ==
4738 meta_getminor(np->dev))
4739 continue;
4740 /*
4741 * this isn't the softpart we are deleting,
4742 * so use this device as the new parent.
4743 */
4744 reset_params.new_parent =
4745 meta_getminor(nlp->namep->dev);
4746 break;
4747 }
4748 }
4749 metafreenamelist(spnlp);
4750 }
4751
4752 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0)
4753 return (-1);
4754
4755 return (0);
4756 }
4757
4758 /*
4759 * FUNCTION: meta_sp_reset_component()
4760 * INPUT: sp - the set name of the device to reset
4761 * name - the string name of the device to reset
4762 * options - metaclear options
4763 * OUTPUT: ep - return error pointer
4764 * RETURNS: int - 0 success, -1 error
4765 * PURPOSE: provides the ability to delete all soft partitions on a
4766 * specified device (metaclear -p). It first gets all of the
4767 * soft partitions on the component and then deletes each one
4768 * individually.
4769 */
4770 int
meta_sp_reset_component(mdsetname_t * sp,char * name,mdcmdopts_t options,md_error_t * ep)4771 meta_sp_reset_component(
4772 mdsetname_t *sp,
4773 char *name,
4774 mdcmdopts_t options,
4775 md_error_t *ep
4776 )
4777 {
4778 mdname_t *compnp, *np;
4779 mdnamelist_t *spnlp = NULL;
4780 mdnamelist_t *nlp = NULL;
4781 md_sp_t *msp;
4782 int count;
4783 md_sp_reset_t reset_params;
4784
4785 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL)
4786 return (-1);
4787
4788 /* If we're starting out with no soft partitions, it's an error */
4789 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
4790 if (count == 0)
4791 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname));
4792 else if (count < 0)
4793 return (-1);
4794
4795 /*
4796 * clear all soft partitions on this component.
4797 * NOTE: we reparent underlying metadevices as we go so that
4798 * things stay sane. Also, if we encounter an error, we stop
4799 * and go no further in case recovery might be needed.
4800 */
4801 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
4802 /* clear out reset parameters */
4803 (void) memset(&reset_params, 0, sizeof (reset_params));
4804
4805 /* check the name */
4806 np = nlp->namep;
4807
4808 if (metachkmeta(np, ep) != 0) {
4809 metafreenamelist(spnlp);
4810 return (-1);
4811 }
4812
4813 /* get the unit structure */
4814 if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
4815 metafreenamelist(spnlp);
4816 return (-1);
4817 }
4818
4819 /* have to deparent/reparent metadevices */
4820 if (metaismeta(compnp)) {
4821 if (nlp->next == NULL)
4822 reset_params.new_parent = MD_NO_PARENT;
4823 else
4824 reset_params.new_parent =
4825 meta_getminor(spnlp->next->namep->dev);
4826 }
4827
4828 /* clear soft partition */
4829 if (meta_sp_reset_common(sp, np, msp, reset_params,
4830 options, ep) < 0) {
4831 metafreenamelist(spnlp);
4832 return (-1);
4833 }
4834 }
4835 metafreenamelist(spnlp);
4836 return (0);
4837 }
4838
4839 /*
4840 * **************************************************************************
4841 * Grow (metattach) Functions *
4842 * **************************************************************************
4843 */
4844
4845 /*
4846 * FUNCTION: meta_sp_attach()
4847 * INPUT: sp - the set name of the device to attach to
4848 * np - the name of the device to attach to
4849 * addsize - the unparsed string holding the amount of space to add
4850 * options - metattach options
4851 * alignment - data alignment
4852 * OUTPUT: ep - return error pointer
4853 * RETURNS: int - 0 success, -1 error
4854 * PURPOSE: grows a soft partition by reading in the existing unit
4855 * structure and setting its state to Growing, allocating more
4856 * space (similar to meta_create_sp()), updating the watermarks,
4857 * and then writing out the new unit structure in the Okay state.
4858 */
4859 int
meta_sp_attach(mdsetname_t * sp,mdname_t * np,char * addsize,mdcmdopts_t options,sp_ext_length_t alignment,md_error_t * ep)4860 meta_sp_attach(
4861 mdsetname_t *sp,
4862 mdname_t *np,
4863 char *addsize,
4864 mdcmdopts_t options,
4865 sp_ext_length_t alignment,
4866 md_error_t *ep
4867 )
4868 {
4869 md_grow_params_t grow_params;
4870 sp_ext_length_t grow_len; /* amount to grow */
4871 mp_unit_t *mp, *new_un;
4872 mdname_t *compnp = NULL;
4873
4874 sp_ext_node_t *extlist = NULL;
4875 int numexts;
4876 mdnamelist_t *spnlp = NULL;
4877 int count;
4878 md_sp_t *msp;
4879 daddr_t start_block;
4880
4881 /* should have the same set */
4882 assert(sp != NULL);
4883 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
4884
4885 /* check name */
4886 if (metachkmeta(np, ep) != 0)
4887 return (-1);
4888
4889 if (meta_sp_parsesize(addsize, &grow_len) == -1) {
4890 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname));
4891 }
4892
4893 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
4894 return (-1);
4895
4896 /* make sure we don't have a parent */
4897 if (MD_HAS_PARENT(mp->c.un_parent)) {
4898 Free(mp);
4899 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname));
4900 }
4901
4902 if (getenv(META_SP_DEBUG)) {
4903 meta_sp_debug("meta_sp_attach: Unit structure before new "
4904 "space:\n");
4905 meta_sp_printunit(mp);
4906 }
4907
4908 /*
4909 * NOTE: the fast option to metakeyname is 0 as opposed to 1
4910 * If this was not the case we would suffer the following
4911 * assertion failure:
4912 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP
4913 * file meta_check.x, line 315
4914 * I guess this is because we have not "seen" this drive before
4915 * and hence hit the failure - this is of course the attach routine
4916 */
4917 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) {
4918 Free(mp);
4919 return (-1);
4920 }
4921
4922 /* metakeyname does not fill in the key. */
4923 compnp->key = mp->un_key;
4924
4925 /* work out the space on the component that we are dealing with */
4926 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
4927
4928 /*
4929 * see if the component has been soft partitioned yet, or if an
4930 * error occurred.
4931 */
4932 if (count == 0) {
4933 Free(mp);
4934 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname));
4935 } else if (count < 0) {
4936 Free(mp);
4937 return (-1);
4938 }
4939
4940 /*
4941 * seed extlist with reserved space at the beginning of the volume and
4942 * enough space for the end watermark. The end watermark always gets
4943 * updated, but if the underlying device changes size it may not be
4944 * pointed to until the extent before it is updated. Since the
4945 * end of the reserved space is where the first watermark starts,
4946 * the reserved extent should never be marked for updating.
4947 */
4948 if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
4949 MD_DISKADDR_ERROR) {
4950 Free(mp);
4951 return (-1);
4952 }
4953
4954 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
4955 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
4956 meta_sp_list_insert(NULL, NULL, &extlist,
4957 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
4958 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
4959
4960 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
4961 Free(mp);
4962 return (-1);
4963 }
4964
4965 metafreenamelist(spnlp);
4966
4967 if (getenv(META_SP_DEBUG)) {
4968 meta_sp_debug("meta_sp_attach: list of used extents:\n");
4969 meta_sp_list_dump(extlist);
4970 }
4971
4972 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
4973
4974 assert(mp->un_numexts >= 1);
4975 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len,
4976 mp->un_ext[mp->un_numexts - 1].un_poff,
4977 (alignment > 0) ? alignment :
4978 meta_sp_get_default_alignment(sp, compnp, ep));
4979
4980 if (numexts == -1) {
4981 Free(mp);
4982 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname));
4983 }
4984
4985 /* allocate new unit structure and copy in old unit */
4986 if ((new_un = meta_sp_updateunit(np, mp, extlist,
4987 grow_len, numexts, ep)) == NULL) {
4988 Free(mp);
4989 return (-1);
4990 }
4991 Free(mp);
4992
4993 /* If running in dryrun mode (-n option), we're done here */
4994 if ((options & MDCMD_DOIT) == 0) {
4995 if (options & MDCMD_PRINT) {
4996 (void) printf(dgettext(TEXT_DOMAIN,
4997 "%s: Soft Partition would grow\n"),
4998 np->cname);
4999 (void) fflush(stdout);
5000 }
5001 return (0);
5002 }
5003
5004 if (getenv(META_SP_DEBUG)) {
5005 meta_sp_debug("meta_sp_attach: updated unit structure:\n");
5006 meta_sp_printunit(new_un);
5007 }
5008
5009 assert(new_un != NULL);
5010
5011 (void) memset(&grow_params, 0, sizeof (grow_params));
5012 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
5013 grow_params.options = MD_CRO_64BIT;
5014 new_un->c.un_revision |= MD_64BIT_META_DEV;
5015 } else {
5016 grow_params.options = MD_CRO_32BIT;
5017 new_un->c.un_revision &= ~MD_64BIT_META_DEV;
5018 }
5019 grow_params.mnum = MD_SID(new_un);
5020 grow_params.size = new_un->c.un_size;
5021 grow_params.mdp = (uintptr_t)new_un;
5022 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum));
5023
5024 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde,
5025 np->cname) != 0) {
5026 (void) mdstealerror(ep, &grow_params.mde);
5027 return (-1);
5028 }
5029
5030 /* update all watermarks */
5031
5032 if ((msp = meta_get_sp(sp, np, ep)) == NULL)
5033 return (-1);
5034 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0)
5035 return (-1);
5036
5037
5038 /* second phase of commit, set status to MD_SP_OK */
5039 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0)
5040 return (-1);
5041
5042 meta_invalidate_name(np);
5043
5044 if (options & MDCMD_PRINT) {
5045 (void) printf(dgettext(TEXT_DOMAIN,
5046 "%s: Soft Partition has been grown\n"),
5047 np->cname);
5048 (void) fflush(stdout);
5049 }
5050
5051 return (0);
5052 }
5053
5054 /*
5055 * **************************************************************************
5056 * Recovery (metarecover) Functions *
5057 * **************************************************************************
5058 */
5059
5060 /*
5061 * FUNCTION: meta_recover_sp()
5062 * INPUT: sp - the name of the set we are recovering on
5063 * compnp - name pointer for device we are recovering on
5064 * argc - argument count
5065 * argv - left over arguments not parsed by metarecover command
5066 * options - metarecover options
5067 * OUTPUT: ep - return error pointer
5068 * RETURNS: int - 0 - success, -1 - error
5069 * PURPOSE: parse soft partitioning-specific metarecover options and
5070 * dispatch to the appropriate function to handle recovery.
5071 */
5072 int
meta_recover_sp(mdsetname_t * sp,mdname_t * compnp,int argc,char * argv[],mdcmdopts_t options,md_error_t * ep)5073 meta_recover_sp(
5074 mdsetname_t *sp,
5075 mdname_t *compnp,
5076 int argc,
5077 char *argv[],
5078 mdcmdopts_t options,
5079 md_error_t *ep
5080 )
5081 {
5082 md_set_desc *sd;
5083
5084 if (argc > 1) {
5085 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
5086 argc, argv);
5087 return (-1);
5088 }
5089
5090 /*
5091 * For a MN set, this operation must be performed on the master
5092 * as it is responsible for maintaining the watermarks
5093 */
5094 if (!metaislocalset(sp)) {
5095 if ((sd = metaget_setdesc(sp, ep)) == NULL)
5096 return (-1);
5097 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) {
5098 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno,
5099 sd->sd_mn_master_nodenm, NULL, NULL);
5100 return (-1);
5101 }
5102 }
5103 if (argc == 0) {
5104 /*
5105 * if no additional arguments are passed, metarecover should
5106 * validate both on-disk and metadb structures as well as
5107 * checking that both are consistent with each other
5108 */
5109 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
5110 return (-1);
5111 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
5112 return (-1);
5113 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0)
5114 return (-1);
5115 } else if (strcmp(argv[0], "-d") == 0) {
5116 /*
5117 * Ensure that there is no existing valid record for this
5118 * soft-partition. If there is we have nothing to do.
5119 */
5120 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0)
5121 return (-1);
5122 /* validate and recover from on-disk structures */
5123 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
5124 return (-1);
5125 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0)
5126 return (-1);
5127 } else if (strcmp(argv[0], "-m") == 0) {
5128 /* validate and recover from metadb structures */
5129 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
5130 return (-1);
5131 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0)
5132 return (-1);
5133 } else {
5134 /* syntax error */
5135 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
5136 argc, argv);
5137 return (-1);
5138 }
5139
5140 return (0);
5141 }
5142
5143 /*
5144 * FUNCTION: meta_sp_display_exthdr()
5145 * INPUT: none
5146 * OUTPUT: none
5147 * RETURNS: void
5148 * PURPOSE: print header line for sp_ext_node_t information. to be used
5149 * in conjunction with meta_sp_display_ext().
5150 */
5151 static void
meta_sp_display_exthdr(void)5152 meta_sp_display_exthdr(void)
5153 {
5154 (void) printf("%20s %5s %7s %20s %20s\n",
5155 dgettext(TEXT_DOMAIN, "Name"),
5156 dgettext(TEXT_DOMAIN, "Seq#"),
5157 dgettext(TEXT_DOMAIN, "Type"),
5158 dgettext(TEXT_DOMAIN, "Offset"),
5159 dgettext(TEXT_DOMAIN, "Length"));
5160 }
5161
5162
5163 /*
5164 * FUNCTION: meta_sp_display_ext()
5165 * INPUT: ext - extent to display
5166 * OUTPUT: none
5167 * RETURNS: void
5168 * PURPOSE: print selected fields from sp_ext_node_t.
5169 */
5170 static void
meta_sp_display_ext(sp_ext_node_t * ext)5171 meta_sp_display_ext(sp_ext_node_t *ext)
5172 {
5173 /* print extent information */
5174 if (ext->ext_namep != NULL)
5175 (void) printf("%20s ", ext->ext_namep->cname);
5176 else
5177 (void) printf("%20s ", "NONE");
5178
5179 (void) printf("%5u ", ext->ext_seq);
5180
5181 switch (ext->ext_type) {
5182 case EXTTYP_ALLOC:
5183 (void) printf("%7s ", "ALLOC");
5184 break;
5185 case EXTTYP_FREE:
5186 (void) printf("%7s ", "FREE");
5187 break;
5188 case EXTTYP_RESERVED:
5189 (void) printf("%7s ", "RESV");
5190 break;
5191 case EXTTYP_END:
5192 (void) printf("%7s ", "END");
5193 break;
5194 default:
5195 (void) printf("%7s ", "INVLD");
5196 break;
5197 }
5198
5199 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length);
5200 }
5201
5202
5203 /*
5204 * FUNCTION: meta_sp_checkseq()
5205 * INPUT: extlist - list of extents to be checked
5206 * OUTPUT: none
5207 * RETURNS: int - 0 - success, -1 - error
5208 * PURPOSE: check soft partition sequence numbers. this function assumes
5209 * that a list of extents representing 1 or more soft partitions
5210 * is passed in sorted in sequence number order. within a
5211 * single soft partition, there may not be any missing or
5212 * duplicate sequence numbers.
5213 */
5214 static int
meta_sp_checkseq(sp_ext_node_t * extlist)5215 meta_sp_checkseq(sp_ext_node_t *extlist)
5216 {
5217 sp_ext_node_t *ext;
5218
5219 assert(extlist != NULL);
5220
5221 for (ext = extlist;
5222 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC;
5223 ext = ext->ext_next) {
5224 if (ext->ext_next->ext_namep != NULL &&
5225 strcmp(ext->ext_next->ext_namep->cname,
5226 ext->ext_namep->cname) != 0)
5227 continue;
5228
5229 if (ext->ext_next->ext_seq != ext->ext_seq + 1) {
5230 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5231 "%s: sequence numbers are "
5232 "incorrect: %d should be %d\n"),
5233 ext->ext_next->ext_namep->cname,
5234 ext->ext_next->ext_seq, ext->ext_seq + 1);
5235 return (-1);
5236 }
5237 }
5238 return (0);
5239 }
5240
5241
5242 /*
5243 * FUNCTION: meta_sp_resolve_name_conflict()
5244 * INPUT: sp - name of set we're are recovering in.
5245 * old_np - name pointer of soft partition we found on disk.
5246 * OUTPUT: new_np - name pointer for new soft partition name.
5247 * ep - error pointer returned.
5248 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error
5249 * PURPOSE: Check to see if the name of one of the soft partitions we found
5250 * on disk already exists in the metadb. If so, prompt for a new
5251 * name. In addition, we keep a static array of names that
5252 * will be recovered from this device since these names don't
5253 * exist in the configuration at this point but cannot be
5254 * recovered more than once.
5255 */
5256 static int
meta_sp_resolve_name_conflict(mdsetname_t * sp,mdname_t * old_np,mdname_t ** new_np,md_error_t * ep)5257 meta_sp_resolve_name_conflict(
5258 mdsetname_t *sp,
5259 mdname_t *old_np,
5260 mdname_t **new_np,
5261 md_error_t *ep
5262 )
5263 {
5264 char yesno[255];
5265 char *yes;
5266 char newname[MD_SP_MAX_DEVNAME_PLUS_1];
5267 int nunits;
5268 static int *used_names = NULL;
5269
5270 assert(old_np != NULL);
5271
5272 if (used_names == NULL) {
5273 if ((nunits = meta_get_nunits(ep)) < 0)
5274 return (-1);
5275 used_names = Zalloc(nunits * sizeof (int));
5276 }
5277
5278 /* see if it exists already */
5279 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 &&
5280 metagetmiscname(old_np, ep) == NULL) {
5281 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
5282 return (-1);
5283 else {
5284 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1;
5285 mdclrerror(ep);
5286 return (0);
5287 }
5288 }
5289
5290 /* name exists, ask the user for a new one */
5291 (void) printf(dgettext(TEXT_DOMAIN,
5292 "WARNING: A soft partition named %s was found in the extent\n"
5293 "headers, but this name already exists in the metadb "
5294 "configuration.\n"
5295 "In order to continue recovery you must supply\n"
5296 "a new name for this soft partition.\n"), old_np->cname);
5297 (void) printf(dgettext(TEXT_DOMAIN,
5298 "Would you like to continue and supply a new name? (yes/no) "));
5299
5300 (void) fflush(stdout);
5301 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
5302 (strlen(yesno) == 1))
5303 (void) snprintf(yesno, sizeof (yesno), "%s\n",
5304 dgettext(TEXT_DOMAIN, "no"));
5305 yes = dgettext(TEXT_DOMAIN, "yes");
5306 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
5307 return (-1);
5308 }
5309
5310 (void) fflush(stdin);
5311
5312 /* get the new name */
5313 for (;;) {
5314 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name "
5315 "for this soft partition (dXXXX) "));
5316 (void) fflush(stdout);
5317 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL)
5318 (void) strcpy(newname, "");
5319
5320 /* remove newline character */
5321 if (newname[strlen(newname) - 1] == '\n')
5322 newname[strlen(newname) - 1] = '\0';
5323
5324 if (!(is_metaname(newname)) ||
5325 (meta_init_make_device(&sp, newname, ep) <= 0)) {
5326 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5327 "Invalid metadevice name\n"));
5328 (void) fflush(stderr);
5329 continue;
5330 }
5331
5332 if ((*new_np = metaname(&sp, newname,
5333 META_DEVICE, ep)) == NULL) {
5334 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5335 "Invalid metadevice name\n"));
5336 (void) fflush(stderr);
5337 continue;
5338 }
5339
5340 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits);
5341 /* make sure the name isn't already being used */
5342 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] ||
5343 metagetmiscname(*new_np, ep) != NULL) {
5344 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5345 "That name already exists\n"));
5346 continue;
5347 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
5348 return (-1);
5349
5350 break;
5351 }
5352
5353 /* got a new name, place in used array and return */
5354 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1;
5355 mdclrerror(ep);
5356 return (1);
5357 }
5358
5359 /*
5360 * FUNCTION: meta_sp_validate_wm()
5361 * INPUT: sp - set name we are recovering in
5362 * compnp - name pointer for device we are recovering from
5363 * options - metarecover options
5364 * OUTPUT: ep - error pointer returned
5365 * RETURNS: int - 0 - success, -1 - error
5366 * PURPOSE: validate and display watermark configuration. walk the
5367 * on-disk watermark structures and validate the information
5368 * found within. since a watermark configuration is
5369 * "self-defining", the act of traversing the watermarks
5370 * is part of the validation process.
5371 */
5372 static int
meta_sp_validate_wm(mdsetname_t * sp,mdname_t * compnp,mdcmdopts_t options,md_error_t * ep)5373 meta_sp_validate_wm(
5374 mdsetname_t *sp,
5375 mdname_t *compnp,
5376 mdcmdopts_t options,
5377 md_error_t *ep
5378 )
5379 {
5380 sp_ext_node_t *extlist = NULL;
5381 sp_ext_node_t *ext;
5382 int num_sps = 0;
5383 int rval;
5384
5385 if ((options & MDCMD_VERBOSE) != 0)
5386 (void) printf(dgettext(TEXT_DOMAIN,
5387 "Verifying on-disk structures on %s.\n"),
5388 compnp->cname);
5389
5390 /*
5391 * for each watermark, build an ext_node, place on list.
5392 */
5393 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist,
5394 meta_sp_cmp_by_nameseq, ep);
5395
5396 if ((options & MDCMD_VERBOSE) != 0) {
5397 /* print out what we found */
5398 if (extlist == NULL)
5399 (void) printf(dgettext(TEXT_DOMAIN,
5400 "No extent headers found on %s.\n"),
5401 compnp->cname);
5402 else {
5403 (void) printf(dgettext(TEXT_DOMAIN,
5404 "The following extent headers were found on %s.\n"),
5405 compnp->cname);
5406 meta_sp_display_exthdr();
5407 }
5408 for (ext = extlist; ext != NULL; ext = ext->ext_next)
5409 meta_sp_display_ext(ext);
5410 }
5411
5412 if (rval < 0) {
5413 (void) printf(dgettext(TEXT_DOMAIN,
5414 "%s: On-disk structures invalid or "
5415 "no soft partitions found.\n"),
5416 compnp->cname);
5417 return (-1);
5418 }
5419
5420 assert(extlist != NULL);
5421
5422 /* count number of soft partitions */
5423 for (ext = extlist;
5424 ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5425 ext = ext->ext_next) {
5426 if (ext->ext_next != NULL &&
5427 ext->ext_next->ext_namep != NULL &&
5428 strcmp(ext->ext_next->ext_namep->cname,
5429 ext->ext_namep->cname) == 0)
5430 continue;
5431 num_sps++;
5432 }
5433
5434 if ((options & MDCMD_VERBOSE) != 0)
5435 (void) printf(dgettext(TEXT_DOMAIN,
5436 "Found %d soft partition(s) on %s.\n"), num_sps,
5437 compnp->cname);
5438
5439 if (num_sps == 0) {
5440 (void) printf(dgettext(TEXT_DOMAIN,
5441 "%s: No soft partitions.\n"), compnp->cname);
5442 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5443 }
5444
5445 /* check sequence numbers */
5446 if ((options & MDCMD_VERBOSE) != 0)
5447 (void) printf(dgettext(TEXT_DOMAIN,
5448 "Checking sequence numbers.\n"));
5449
5450 if (meta_sp_checkseq(extlist) != 0)
5451 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5452
5453 return (0);
5454 }
5455
5456 /*
5457 * FUNCTION: meta_sp_validate_unit()
5458 * INPUT: sp - name of set we are recovering in
5459 * compnp - name of component we are recovering from
5460 * options - metarecover options
5461 * OUTPUT: ep - error pointer returned
5462 * RETURNS: int - 0 - success, -1 - error
5463 * PURPOSE: validate and display metadb configuration. begin by getting
5464 * all soft partitions built on the specified component. get
5465 * the unit structure for each one and validate the fields within.
5466 */
5467 static int
meta_sp_validate_unit(mdsetname_t * sp,mdname_t * compnp,mdcmdopts_t options,md_error_t * ep)5468 meta_sp_validate_unit(
5469 mdsetname_t *sp,
5470 mdname_t *compnp,
5471 mdcmdopts_t options,
5472 md_error_t *ep
5473 )
5474 {
5475 md_sp_t *msp;
5476 mdnamelist_t *spnlp = NULL;
5477 mdnamelist_t *namep = NULL;
5478 int count;
5479 uint_t extn;
5480 sp_ext_length_t size;
5481
5482 if ((options & MDCMD_VERBOSE) != 0)
5483 (void) printf(dgettext(TEXT_DOMAIN,
5484 "%s: Validating soft partition metadb entries.\n"),
5485 compnp->cname);
5486
5487 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
5488 return (-1);
5489
5490 /* get all soft partitions on component */
5491 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
5492
5493 if (count == 0) {
5494 (void) printf(dgettext(TEXT_DOMAIN,
5495 "%s: No soft partitions.\n"), compnp->cname);
5496 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5497 } else if (count < 0) {
5498 return (-1);
5499 }
5500
5501 /* Now go through the soft partitions and check each one */
5502 for (namep = spnlp; namep != NULL; namep = namep->next) {
5503 mdname_t *curnp = namep->namep;
5504 sp_ext_offset_t curvoff;
5505
5506 /* get the unit structure */
5507 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
5508 return (-1);
5509
5510 /* verify generic unit structure parameters */
5511 if ((options & MDCMD_VERBOSE) != 0)
5512 (void) printf(dgettext(TEXT_DOMAIN,
5513 "\nVerifying device %s.\n"),
5514 curnp->cname);
5515
5516 /*
5517 * MD_SP_LAST is an invalid state and is always the
5518 * highest numbered.
5519 */
5520 if (msp->status >= MD_SP_LAST) {
5521 (void) printf(dgettext(TEXT_DOMAIN,
5522 "%s: status value %u is out of range.\n"),
5523 curnp->cname, msp->status);
5524 return (mdmderror(ep, MDE_RECOVER_FAILED,
5525 0, curnp->cname));
5526 } else if ((options & MDCMD_VERBOSE) != 0) {
5527 uint_t tstate = 0;
5528
5529 if (metaismeta(msp->compnamep)) {
5530 if (meta_get_tstate(msp->common.namep->dev,
5531 &tstate, ep) != 0)
5532 return (-1);
5533 }
5534 (void) printf(dgettext(TEXT_DOMAIN,
5535 "%s: Status \"%s\" is valid.\n"),
5536 curnp->cname, meta_sp_status_to_name(msp->status,
5537 tstate & MD_DEV_ERRORED));
5538 }
5539
5540 /* Now verify each extent */
5541 if ((options & MDCMD_VERBOSE) != 0)
5542 (void) printf("%14s %21s %21s %21s\n",
5543 dgettext(TEXT_DOMAIN, "Extent Number"),
5544 dgettext(TEXT_DOMAIN, "Virtual Offset"),
5545 dgettext(TEXT_DOMAIN, "Physical Offset"),
5546 dgettext(TEXT_DOMAIN, "Length"));
5547
5548 curvoff = 0ULL;
5549 for (extn = 0; extn < msp->ext.ext_len; extn++) {
5550 md_sp_ext_t *extp = &msp->ext.ext_val[extn];
5551
5552 if ((options & MDCMD_VERBOSE) != 0)
5553 (void) printf("%14u %21llu %21llu %21llu\n",
5554 extn, extp->voff, extp->poff, extp->len);
5555
5556 if (extp->voff != curvoff) {
5557 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5558 "%s: virtual offset for extent %u "
5559 "is inconsistent, expected %llu, "
5560 "got %llu.\n"), curnp->cname, extn,
5561 curvoff, extp->voff);
5562 return (mdmderror(ep, MDE_RECOVER_FAILED,
5563 0, compnp->cname));
5564 }
5565
5566 /* make sure extent does not drop off the end */
5567 if ((extp->poff + extp->len) == size) {
5568 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5569 "%s: extent %u at offset %llu, "
5570 "length %llu exceeds the size of the "
5571 "device, %llu.\n"), curnp->cname,
5572 extn, extp->poff, extp->len, size);
5573 return (mdmderror(ep, MDE_RECOVER_FAILED,
5574 0, compnp->cname));
5575 }
5576
5577 curvoff += extp->len;
5578 }
5579 }
5580 if (options & MDCMD_PRINT) {
5581 (void) printf(dgettext(TEXT_DOMAIN,
5582 "%s: Soft Partition metadb configuration is valid\n"),
5583 compnp->cname);
5584 }
5585 return (0);
5586 }
5587
5588 /*
5589 * FUNCTION: meta_sp_validate_wm_and_unit()
5590 * INPUT: sp - name of set we are recovering in
5591 * compnp - name of device we are recovering from
5592 * options - metarecover options
5593 * OUTPUT: ep - error pointer returned
5594 * RETURNS: int - 0 - success, -1 error
5595 * PURPOSE: cross-validate and display watermarks and metadb records.
5596 * get both the unit structures for the soft partitions built
5597 * on the specified component and the watermarks found on that
5598 * component and check to make sure they are consistent with
5599 * each other.
5600 */
5601 static int
meta_sp_validate_wm_and_unit(mdsetname_t * sp,mdname_t * np,mdcmdopts_t options,md_error_t * ep)5602 meta_sp_validate_wm_and_unit(
5603 mdsetname_t *sp,
5604 mdname_t *np,
5605 mdcmdopts_t options,
5606 md_error_t *ep
5607 )
5608 {
5609 sp_ext_node_t *wmlist = NULL;
5610 sp_ext_node_t *unitlist = NULL;
5611 sp_ext_node_t *unitext;
5612 sp_ext_node_t *wmext;
5613 sp_ext_offset_t tmpunitoff;
5614 mdnamelist_t *spnlp = NULL;
5615 int count;
5616 int rval = 0;
5617 int verbose = (options & MDCMD_VERBOSE);
5618
5619 /* get unit structure list */
5620 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
5621 if (count <= 0)
5622 return (-1);
5623
5624 meta_sp_list_insert(NULL, NULL, &unitlist,
5625 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
5626 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
5627
5628 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) {
5629 metafreenamelist(spnlp);
5630 return (-1);
5631 }
5632
5633 metafreenamelist(spnlp);
5634
5635 meta_sp_list_freefill(&unitlist, metagetsize(np, ep));
5636
5637 if (meta_sp_extlist_from_wm(sp, np, &wmlist,
5638 meta_sp_cmp_by_offset, ep) < 0) {
5639 meta_sp_list_free(&unitlist);
5640 return (-1);
5641 }
5642
5643 if (getenv(META_SP_DEBUG)) {
5644 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n");
5645 meta_sp_list_dump(unitlist);
5646 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n");
5647 meta_sp_list_dump(wmlist);
5648 }
5649
5650 /*
5651 * step through both lists and compare allocated nodes. Free
5652 * nodes and end watermarks may differ between the two but
5653 * that's generally ok, and if they're wrong will typically
5654 * cause misplaced allocated extents.
5655 */
5656 if (verbose)
5657 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb "
5658 "allocations match extent headers.\n"), np->cname);
5659
5660 unitext = unitlist;
5661 wmext = wmlist;
5662 while ((wmext != NULL) && (unitext != NULL)) {
5663 /* find next allocated extents in each list */
5664 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC)
5665 wmext = wmext->ext_next;
5666
5667 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC)
5668 unitext = unitext->ext_next;
5669
5670 if (wmext == NULL || unitext == NULL)
5671 break;
5672
5673 if (verbose) {
5674 (void) printf(dgettext(TEXT_DOMAIN,
5675 "Metadb extent:\n"));
5676 meta_sp_display_exthdr();
5677 meta_sp_display_ext(unitext);
5678 (void) printf(dgettext(TEXT_DOMAIN,
5679 "Extent header extent:\n"));
5680 meta_sp_display_exthdr();
5681 meta_sp_display_ext(wmext);
5682 (void) printf("\n");
5683 }
5684
5685 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0)
5686 rval = -1;
5687
5688 /*
5689 * if the offsets aren't equal, only increment the
5690 * lowest one in hopes of getting the lists back in sync.
5691 */
5692 tmpunitoff = unitext->ext_offset;
5693 if (unitext->ext_offset <= wmext->ext_offset)
5694 unitext = unitext->ext_next;
5695 if (wmext->ext_offset <= tmpunitoff)
5696 wmext = wmext->ext_next;
5697 }
5698
5699 /*
5700 * if both lists aren't at the end then there are extra
5701 * allocated nodes in one of them.
5702 */
5703 if (wmext != NULL) {
5704 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5705 "%s: extent headers contain allocations not in "
5706 "the metadb\n\n"), np->cname);
5707 rval = -1;
5708 }
5709
5710 if (unitext != NULL) {
5711 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5712 "%s: metadb contains allocations not in the extent "
5713 "headers\n\n"), np->cname);
5714 rval = -1;
5715 }
5716
5717 if (options & MDCMD_PRINT) {
5718 if (rval == 0) {
5719 (void) printf(dgettext(TEXT_DOMAIN,
5720 "%s: Soft Partition metadb matches extent "
5721 "header configuration\n"), np->cname);
5722 } else {
5723 (void) printf(dgettext(TEXT_DOMAIN,
5724 "%s: Soft Partition metadb does not match extent "
5725 "header configuration\n"), np->cname);
5726 }
5727 }
5728
5729 return (rval);
5730 }
5731
5732 /*
5733 * FUNCTION: meta_sp_validate_exts()
5734 * INPUT: compnp - name pointer for device we are recovering from
5735 * wmext - extent node representing watermark
5736 * unitext - extent node from unit structure
5737 * OUTPUT: ep - return error pointer
5738 * RETURNS: int - 0 - succes, mdmderror return code - error
5739 * PURPOSE: Takes two extent nodes and checks them against each other.
5740 * offset, length, sequence number, set, and name are compared.
5741 */
5742 static int
meta_sp_validate_exts(mdname_t * compnp,sp_ext_node_t * wmext,sp_ext_node_t * unitext,md_error_t * ep)5743 meta_sp_validate_exts(
5744 mdname_t *compnp,
5745 sp_ext_node_t *wmext,
5746 sp_ext_node_t *unitext,
5747 md_error_t *ep
5748 )
5749 {
5750 if (wmext->ext_offset != unitext->ext_offset) {
5751 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5752 "%s: unit structure and extent header offsets differ.\n"),
5753 compnp->cname);
5754 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5755 }
5756
5757 if (wmext->ext_length != unitext->ext_length) {
5758 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5759 "%s: unit structure and extent header lengths differ.\n"),
5760 compnp->cname);
5761 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5762 }
5763
5764 if (wmext->ext_seq != unitext->ext_seq) {
5765 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5766 "%s: unit structure and extent header sequence numbers "
5767 "differ.\n"), compnp->cname);
5768 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5769 }
5770
5771 if (wmext->ext_type != unitext->ext_type) {
5772 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5773 "%s: unit structure and extent header types differ.\n"),
5774 compnp->cname);
5775 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5776 }
5777
5778 /*
5779 * If one has a set pointer and the other doesn't, error.
5780 * If both extents have setnames, then make sure they match
5781 * If both are NULL, it's ok, they match.
5782 */
5783 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) {
5784 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5785 "%s: unit structure and extent header set values "
5786 "differ.\n"), compnp->cname);
5787 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5788 }
5789
5790 if (unitext->ext_setp != NULL) {
5791 if (strcmp(unitext->ext_setp->setname,
5792 wmext->ext_setp->setname) != 0) {
5793 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5794 "%s: unit structure and extent header set names "
5795 "differ.\n"), compnp->cname);
5796 return (mdmderror(ep, MDE_RECOVER_FAILED,
5797 0, compnp->cname));
5798 }
5799 }
5800
5801 /*
5802 * If one has a name pointer and the other doesn't, error.
5803 * If both extents have names, then make sure they match
5804 * If both are NULL, it's ok, they match.
5805 */
5806 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) {
5807 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5808 "%s: unit structure and extent header name values "
5809 "differ.\n"), compnp->cname);
5810 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5811 }
5812
5813 if (unitext->ext_namep != NULL) {
5814 if (strcmp(wmext->ext_namep->cname,
5815 unitext->ext_namep->cname) != 0) {
5816 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5817 "%s: unit structure and extent header names "
5818 "differ.\n"), compnp->cname);
5819 return (mdmderror(ep, MDE_RECOVER_FAILED,
5820 0, compnp->cname));
5821 }
5822 }
5823
5824 return (0);
5825 }
5826
5827 /*
5828 * FUNCTION: update_sp_status()
5829 * INPUT: sp - name of set we are recovering in
5830 * minors - pointer to an array of soft partition minor numbers
5831 * num_sps - number of minor numbers in array
5832 * status - new status to be applied to all soft parts in array
5833 * mn_set - set if current set is a multi-node set
5834 * OUTPUT: ep - return error pointer
5835 * RETURNS: int - 0 - success, -1 - error
5836 * PURPOSE: update status of soft partitions to new status. minors is an
5837 * array of minor numbers to apply the new status to.
5838 * If mn_set is set, a message is sent to all nodes in the
5839 * cluster to update the status locally.
5840 */
5841 static int
update_sp_status(mdsetname_t * sp,minor_t * minors,int num_sps,sp_status_t status,bool_t mn_set,md_error_t * ep)5842 update_sp_status(
5843 mdsetname_t *sp,
5844 minor_t *minors,
5845 int num_sps,
5846 sp_status_t status,
5847 bool_t mn_set,
5848 md_error_t *ep
5849 )
5850 {
5851 int i;
5852 int err = 0;
5853
5854 if (mn_set) {
5855 md_mn_msg_sp_setstat_t sp_setstat_params;
5856 int result;
5857 md_mn_result_t *resp = NULL;
5858
5859 for (i = 0; i < num_sps; i++) {
5860 sp_setstat_params.sp_setstat_mnum = minors[i];
5861 sp_setstat_params.sp_setstat_status = status;
5862
5863 result = mdmn_send_message(sp->setno,
5864 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0,
5865 (char *)&sp_setstat_params,
5866 sizeof (sp_setstat_params),
5867 &resp, ep);
5868 if (resp != NULL) {
5869 if (resp->mmr_exitval != 0)
5870 err = -1;
5871 free_result(resp);
5872 }
5873 if (result != 0) {
5874 err = -1;
5875 }
5876 }
5877 } else {
5878 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0)
5879 err = -1;
5880 }
5881 if (err < 0) {
5882 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5883 "Error updating status on recovered soft "
5884 "partitions.\n"));
5885 }
5886 return (err);
5887 }
5888
5889 /*
5890 * FUNCTION: meta_sp_recover_from_wm()
5891 * INPUT: sp - name of set we are recovering in
5892 * compnp - name pointer for component we are recovering from
5893 * options - metarecover options
5894 * OUTPUT: ep - return error pointer
5895 * RETURNS: int - 0 - success, -1 - error
5896 * PURPOSE: update metadb records to match watermarks. begin by getting
5897 * an extlist representing all soft partitions on the component.
5898 * then build a unit structure for each soft partition.
5899 * notify user of changes, then commit each soft partition to
5900 * the metadb one at a time in the "recovering" state. update
5901 * any watermarks that may need it (to reflect possible name
5902 * changes), and, finally, set the status of all recovered
5903 * partitions to the "OK" state at once.
5904 */
5905 static int
meta_sp_recover_from_wm(mdsetname_t * sp,mdname_t * compnp,mdcmdopts_t options,md_error_t * ep)5906 meta_sp_recover_from_wm(
5907 mdsetname_t *sp,
5908 mdname_t *compnp,
5909 mdcmdopts_t options,
5910 md_error_t *ep
5911 )
5912 {
5913 sp_ext_node_t *extlist = NULL;
5914 sp_ext_node_t *sp_list = NULL;
5915 sp_ext_node_t *update_list = NULL;
5916 sp_ext_node_t *ext;
5917 sp_ext_node_t *sp_ext;
5918 mp_unit_t *mp;
5919 mp_unit_t **un_array;
5920 int numexts = 0, num_sps = 0, i = 0;
5921 int err = 0;
5922 int not_recovered = 0;
5923 int committed = 0;
5924 sp_ext_length_t sp_length = 0LL;
5925 mdnamelist_t *keynlp = NULL;
5926 mdname_t *np;
5927 mdname_t *new_np;
5928 int new_name;
5929 md_set_params_t set_params;
5930 minor_t *minors = NULL;
5931 char yesno[255];
5932 char *yes;
5933 bool_t mn_set = 0;
5934 md_set_desc *sd;
5935 mm_unit_t *mm;
5936 md_set_mmown_params_t *ownpar = NULL;
5937 int comp_is_mirror = 0;
5938
5939 /*
5940 * if this component appears in another metadevice already, do
5941 * NOT recover from it.
5942 */
5943 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0)
5944 return (-1);
5945
5946 /* set flag if dealing with a MN set */
5947 if (!metaislocalset(sp)) {
5948 if ((sd = metaget_setdesc(sp, ep)) == NULL) {
5949 return (-1);
5950 }
5951 if (MD_MNSET_DESC(sd))
5952 mn_set = 1;
5953 }
5954 /*
5955 * for each watermark, build an ext_node, place on list.
5956 */
5957 if (meta_sp_extlist_from_wm(sp, compnp, &extlist,
5958 meta_sp_cmp_by_nameseq, ep) < 0)
5959 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5960
5961 assert(extlist != NULL);
5962
5963 /* count number of soft partitions */
5964 for (ext = extlist;
5965 ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5966 ext = ext->ext_next) {
5967 if (ext->ext_next != NULL &&
5968 ext->ext_next->ext_namep != NULL &&
5969 strcmp(ext->ext_next->ext_namep->cname,
5970 ext->ext_namep->cname) == 0)
5971 continue;
5972 num_sps++;
5973 }
5974
5975 /* allocate array of unit structure pointers */
5976 un_array = Zalloc(num_sps * sizeof (mp_unit_t *));
5977
5978 /*
5979 * build unit structures from list of ext_nodes.
5980 */
5981 for (ext = extlist;
5982 ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5983 ext = ext->ext_next) {
5984 meta_sp_list_insert(ext->ext_setp, ext->ext_namep,
5985 &sp_list, ext->ext_offset, ext->ext_length,
5986 ext->ext_type, ext->ext_seq, ext->ext_flags,
5987 meta_sp_cmp_by_nameseq);
5988
5989 numexts++;
5990 sp_length += ext->ext_length - MD_SP_WMSIZE;
5991
5992 if (ext->ext_next != NULL &&
5993 ext->ext_next->ext_namep != NULL &&
5994 strcmp(ext->ext_next->ext_namep->cname,
5995 ext->ext_namep->cname) == 0)
5996 continue;
5997
5998 /*
5999 * if we made it here, we are at a soft partition
6000 * boundary in the list.
6001 */
6002 if (getenv(META_SP_DEBUG)) {
6003 meta_sp_debug("meta_recover_from_wm: dumping wm "
6004 "list:\n");
6005 meta_sp_list_dump(sp_list);
6006 }
6007
6008 assert(sp_list != NULL);
6009 assert(sp_list->ext_namep != NULL);
6010
6011 if ((new_name = meta_sp_resolve_name_conflict(sp,
6012 sp_list->ext_namep, &new_np, ep)) < 0) {
6013 err = 1;
6014 goto out;
6015 } else if (new_name) {
6016 for (sp_ext = sp_list;
6017 sp_ext != NULL;
6018 sp_ext = sp_ext->ext_next) {
6019 /*
6020 * insert into the update list for
6021 * watermark update.
6022 */
6023 meta_sp_list_insert(sp_ext->ext_setp,
6024 new_np, &update_list, sp_ext->ext_offset,
6025 sp_ext->ext_length, sp_ext->ext_type,
6026 sp_ext->ext_seq, EXTFLG_UPDATE,
6027 meta_sp_cmp_by_offset);
6028 }
6029
6030 }
6031 if (options & MDCMD_DOIT) {
6032 /* store name in namespace */
6033 if (mn_set) {
6034 /* send message to all nodes to return key */
6035 md_mn_msg_addkeyname_t *send_params;
6036 int result;
6037 md_mn_result_t *resp = NULL;
6038 int message_size;
6039
6040 message_size = sizeof (*send_params) +
6041 strlen(compnp->cname) + 1;
6042 send_params = Zalloc(message_size);
6043 send_params->addkeyname_setno = sp->setno;
6044 (void) strcpy(&send_params->addkeyname_name[0],
6045 compnp->cname);
6046 result = mdmn_send_message(sp->setno,
6047 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS,
6048 0, (char *)send_params, message_size, &resp,
6049 ep);
6050 Free(send_params);
6051 if (resp != NULL) {
6052 if (resp->mmr_exitval >= 0) {
6053 compnp->key =
6054 (mdkey_t)resp->mmr_exitval;
6055 } else {
6056 err = 1;
6057 free_result(resp);
6058 goto out;
6059 }
6060 free_result(resp);
6061 }
6062 if (result != 0) {
6063 err = 1;
6064 goto out;
6065 }
6066 (void) metanamelist_append(&keynlp, compnp);
6067 } else {
6068 if (add_key_name(sp, compnp, &keynlp,
6069 ep) != 0) {
6070 err = 1;
6071 goto out;
6072 }
6073 }
6074 }
6075
6076 /* create the unit structure */
6077 if ((mp = meta_sp_createunit(
6078 (new_name) ? new_np : sp_list->ext_namep, compnp,
6079 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) {
6080 err = 1;
6081 goto out;
6082 }
6083
6084 if (getenv(META_SP_DEBUG)) {
6085 meta_sp_debug("meta_sp_recover_from_wm: "
6086 "printing newly created unit structure");
6087 meta_sp_printunit(mp);
6088 }
6089
6090 /* place in unit structure array */
6091 un_array[i++] = mp;
6092
6093 /* free sp_list */
6094 meta_sp_list_free(&sp_list);
6095 sp_list = NULL;
6096 numexts = 0;
6097 sp_length = 0LL;
6098 }
6099
6100 /* display configuration updates */
6101 (void) printf(dgettext(TEXT_DOMAIN,
6102 "The following soft partitions were found and will be added to\n"
6103 "your metadevice configuration.\n"));
6104 (void) printf("%5s %15s %18s\n",
6105 dgettext(TEXT_DOMAIN, "Name"),
6106 dgettext(TEXT_DOMAIN, "Size"),
6107 dgettext(TEXT_DOMAIN, "No. of Extents"));
6108 for (i = 0; i < num_sps; i++) {
6109 (void) printf("%5s%lu %15llu %9d\n", "d",
6110 MD_MIN2UNIT(MD_SID(un_array[i])),
6111 un_array[i]->un_length, un_array[i]->un_numexts);
6112 }
6113
6114 if (!(options & MDCMD_DOIT)) {
6115 not_recovered = 1;
6116 goto out;
6117 }
6118
6119 /* ask user for confirmation */
6120 (void) printf(dgettext(TEXT_DOMAIN,
6121 "WARNING: You are about to add one or more soft partition\n"
6122 "metadevices to your metadevice configuration. If there\n"
6123 "appears to be an error in the soft partition(s) displayed\n"
6124 "above, do NOT proceed with this recovery operation.\n"));
6125 (void) printf(dgettext(TEXT_DOMAIN,
6126 "Are you sure you want to do this (yes/no)? "));
6127
6128 (void) fflush(stdout);
6129 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
6130 (strlen(yesno) == 1))
6131 (void) snprintf(yesno, sizeof (yesno), "%s\n",
6132 dgettext(TEXT_DOMAIN, "no"));
6133 yes = dgettext(TEXT_DOMAIN, "yes");
6134 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
6135 not_recovered = 1;
6136 goto out;
6137 }
6138
6139 /* commit records one at a time */
6140 for (i = 0; i < num_sps; i++) {
6141 (void) memset(&set_params, 0, sizeof (set_params));
6142 set_params.mnum = MD_SID(un_array[i]);
6143 set_params.size = (un_array[i])->c.un_size;
6144 set_params.mdp = (uintptr_t)(un_array[i]);
6145 set_params.options =
6146 meta_check_devicesize(un_array[i]->un_length);
6147 if (set_params.options == MD_CRO_64BIT) {
6148 un_array[i]->c.un_revision |= MD_64BIT_META_DEV;
6149 } else {
6150 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV;
6151 }
6152 MD_SETDRIVERNAME(&set_params, MD_SP,
6153 MD_MIN2SET(set_params.mnum));
6154
6155 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep);
6156
6157 /*
6158 * If this is an MN set, send the MD_IOCSET ioctl to all nodes
6159 */
6160 if (mn_set) {
6161 md_mn_msg_iocset_t send_params;
6162 int result;
6163 md_mn_result_t *resp = NULL;
6164 int mess_size;
6165
6166 /*
6167 * Calculate message size. md_mn_msg_iocset_t only
6168 * contains one extent, so increment the size to
6169 * include all extents
6170 */
6171 mess_size = sizeof (send_params) -
6172 sizeof (mp_ext_t) +
6173 (un_array[i]->un_numexts * sizeof (mp_ext_t));
6174
6175 send_params.iocset_params = set_params;
6176 (void) memcpy(&send_params.unit, un_array[i],
6177 sizeof (*un_array[i]) - sizeof (mp_ext_t) +
6178 (un_array[i]->un_numexts * sizeof (mp_ext_t)));
6179 result = mdmn_send_message(sp->setno,
6180 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0,
6181 (char *)&send_params, mess_size, &resp,
6182 ep);
6183 if (resp != NULL) {
6184 if (resp->mmr_exitval != 0)
6185 err = 1;
6186 free_result(resp);
6187 }
6188 if (result != 0) {
6189 err = 1;
6190 }
6191 } else {
6192 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
6193 np->cname) != 0) {
6194 err = 1;
6195 }
6196 }
6197
6198 if (err == 1) {
6199 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6200 "%s: Error committing record to metadb.\n"),
6201 np->cname);
6202 goto out;
6203 }
6204
6205 /* note that we've committed a record */
6206 if (!committed)
6207 committed = 1;
6208
6209 /* update any watermarks that need it */
6210 if (update_list != NULL) {
6211 md_sp_t *msp;
6212
6213 /*
6214 * Check to see if we're trying to create a partition
6215 * on a mirror. If so we may have to enforce an
6216 * ownership change before writing the watermark out.
6217 */
6218 if (metaismeta(compnp)) {
6219 char *miscname;
6220
6221 miscname = metagetmiscname(compnp, ep);
6222 if (miscname != NULL)
6223 comp_is_mirror = (strcmp(miscname,
6224 MD_MIRROR) == 0);
6225 else
6226 comp_is_mirror = 0;
6227 }
6228 /*
6229 * If this is a MN set and the component is a mirror,
6230 * change ownership to this node in order to write the
6231 * watermarks
6232 */
6233 if (mn_set && comp_is_mirror) {
6234 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
6235 if (mm == NULL) {
6236 err = 1;
6237 goto out;
6238 } else {
6239 err = meta_mn_change_owner(&ownpar,
6240 sp->setno,
6241 meta_getminor(compnp->dev),
6242 sd->sd_mn_mynode->nd_nodeid,
6243 MD_MN_MM_PREVENT_CHANGE |
6244 MD_MN_MM_SPAWN_THREAD);
6245 if (err != 0)
6246 goto out;
6247 }
6248 }
6249
6250 if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
6251 err = 1;
6252 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6253 "%s: Error updating extent headers.\n"),
6254 np->cname);
6255 goto out;
6256 }
6257 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) {
6258 err = 1;
6259 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6260 "%s: Error updating extent headers "
6261 "on disk.\n"), np->cname);
6262 goto out;
6263 }
6264 }
6265 /*
6266 * If we have changed ownership earlier and prevented any
6267 * ownership changes, we can now allow ownership changes
6268 * again.
6269 */
6270 if (ownpar) {
6271 (void) meta_mn_change_owner(&ownpar, sp->setno,
6272 ownpar->d.mnum,
6273 ownpar->d.owner,
6274 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
6275 }
6276 }
6277
6278 /* update status of all soft partitions to OK */
6279 minors = Zalloc(num_sps * sizeof (minor_t));
6280 for (i = 0; i < num_sps; i++)
6281 minors[i] = MD_SID(un_array[i]);
6282
6283 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep);
6284 if (err != 0)
6285 goto out;
6286
6287 if (options & MDCMD_PRINT)
6288 (void) printf(dgettext(TEXT_DOMAIN, "%s: "
6289 "Soft Partitions recovered from device.\n"),
6290 compnp->cname);
6291 out:
6292 /* free memory */
6293 if (extlist != NULL)
6294 meta_sp_list_free(&extlist);
6295 if (sp_list != NULL)
6296 meta_sp_list_free(&sp_list);
6297 if (update_list != NULL)
6298 meta_sp_list_free(&update_list);
6299 if (un_array != NULL) {
6300 for (i = 0; i < num_sps; i++)
6301 Free(un_array[i]);
6302 Free(un_array);
6303 }
6304 if (minors != NULL)
6305 Free(minors);
6306 if (ownpar != NULL)
6307 Free(ownpar);
6308 (void) fflush(stdout);
6309
6310 if ((keynlp != NULL) && (committed != 1)) {
6311 /*
6312 * if we haven't committed any softparts, either because of an
6313 * error or because the user decided not to proceed, delete
6314 * namelist key for the component
6315 */
6316 if (mn_set) {
6317 mdnamelist_t *p;
6318
6319 for (p = keynlp; (p != NULL); p = p->next) {
6320 mdname_t *np = p->namep;
6321 md_mn_msg_delkeyname_t send_params;
6322 md_mn_result_t *resp = NULL;
6323
6324 send_params.delkeyname_dev = np->dev;
6325 send_params.delkeyname_setno = sp->setno;
6326 send_params.delkeyname_key = np->key;
6327 (void) mdmn_send_message(sp->setno,
6328 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS,
6329 0, (char *)&send_params,
6330 sizeof (send_params),
6331 &resp, ep);
6332 if (resp != NULL) {
6333 free_result(resp);
6334 }
6335 }
6336 } else {
6337 (void) del_key_names(sp, keynlp, NULL);
6338 }
6339 }
6340
6341 metafreenamelist(keynlp);
6342
6343 if (err)
6344 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
6345
6346 if (not_recovered)
6347 if (options & MDCMD_PRINT)
6348 (void) printf(dgettext(TEXT_DOMAIN, "%s: "
6349 "Soft Partitions NOT recovered from device.\n"),
6350 compnp->cname);
6351 return (0);
6352 }
6353
6354 /*
6355 * FUNCTION: meta_sp_recover_from_unit()
6356 * INPUT: sp - name of set we are recovering in
6357 * compnp - name of component we are recovering from
6358 * options - metarecover options
6359 * OUTPUT: ep - return error pointer
6360 * RETURNS: int - 0 - success, -1 - error
6361 * PURPOSE: update watermarks to match metadb records. begin by getting
6362 * a namelist representing all soft partitions on the specified
6363 * component. then, build an extlist representing the soft
6364 * partitions, filling in the freespace extents. notify user
6365 * of changes, place all soft partitions into the "recovering"
6366 * state and update the watermarks. finally, return all soft
6367 * partitions to the "OK" state.
6368 */
6369 static int
meta_sp_recover_from_unit(mdsetname_t * sp,mdname_t * compnp,mdcmdopts_t options,md_error_t * ep)6370 meta_sp_recover_from_unit(
6371 mdsetname_t *sp,
6372 mdname_t *compnp,
6373 mdcmdopts_t options,
6374 md_error_t *ep
6375 )
6376 {
6377 mdnamelist_t *spnlp = NULL;
6378 mdnamelist_t *nlp = NULL;
6379 sp_ext_node_t *ext = NULL;
6380 sp_ext_node_t *extlist = NULL;
6381 int count;
6382 char yesno[255];
6383 char *yes;
6384 int rval = 0;
6385 minor_t *minors = NULL;
6386 int i;
6387 md_sp_t *msp;
6388 md_set_desc *sd;
6389 bool_t mn_set = 0;
6390 daddr_t start_block;
6391
6392 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
6393 if (count <= 0)
6394 return (-1);
6395
6396 /* set flag if dealing with a MN set */
6397 if (!metaislocalset(sp)) {
6398 if ((sd = metaget_setdesc(sp, ep)) == NULL) {
6399 return (-1);
6400 }
6401 if (MD_MNSET_DESC(sd))
6402 mn_set = 1;
6403 }
6404 /*
6405 * Save the XDR unit structure for one of the soft partitions;
6406 * we'll use this later to provide metadevice context to
6407 * update the watermarks so the device can be resolved by
6408 * devid instead of dev_t.
6409 */
6410 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) {
6411 metafreenamelist(spnlp);
6412 return (-1);
6413 }
6414
6415 if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
6416 MD_DISKADDR_ERROR) {
6417 return (-1);
6418 }
6419
6420 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
6421 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
6422 meta_sp_list_insert(NULL, NULL, &extlist,
6423 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
6424 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
6425
6426 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
6427 metafreenamelist(spnlp);
6428 return (-1);
6429 }
6430
6431 assert(extlist != NULL);
6432 if ((options & MDCMD_VERBOSE) != 0) {
6433 (void) printf(dgettext(TEXT_DOMAIN,
6434 "Updating extent headers on device %s from metadb.\n\n"),
6435 compnp->cname);
6436 (void) printf(dgettext(TEXT_DOMAIN,
6437 "The following extent headers will be written:\n"));
6438 meta_sp_display_exthdr();
6439 }
6440
6441 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
6442
6443 for (ext = extlist; ext != NULL; ext = ext->ext_next) {
6444
6445 /* mark every node for updating except the reserved space */
6446 if (ext->ext_type != EXTTYP_RESERVED) {
6447 ext->ext_flags |= EXTFLG_UPDATE;
6448
6449 /* print extent information */
6450 if ((options & MDCMD_VERBOSE) != 0)
6451 meta_sp_display_ext(ext);
6452 }
6453 }
6454
6455 /* request verification and then update all watermarks */
6456 if ((options & MDCMD_DOIT) != 0) {
6457
6458 (void) printf(dgettext(TEXT_DOMAIN,
6459 "\nWARNING: You are about to overwrite portions of %s\n"
6460 "with soft partition metadata. The extent headers will be\n"
6461 "written to match the existing metadb configuration. If\n"
6462 "the device was not previously setup with this\n"
6463 "configuration, data loss may result.\n\n"),
6464 compnp->cname);
6465 (void) printf(dgettext(TEXT_DOMAIN,
6466 "Are you sure you want to do this (yes/no)? "));
6467
6468 (void) fflush(stdout);
6469 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
6470 (strlen(yesno) == 1))
6471 (void) snprintf(yesno, sizeof (yesno),
6472 "%s\n", dgettext(TEXT_DOMAIN, "no"));
6473 yes = dgettext(TEXT_DOMAIN, "yes");
6474 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) {
6475 /* place soft partitions into recovering state */
6476 minors = Zalloc(count * sizeof (minor_t));
6477 for (nlp = spnlp, i = 0;
6478 nlp != NULL && i < count;
6479 nlp = nlp->next, i++) {
6480 assert(nlp->namep != NULL);
6481 minors[i] = meta_getminor(nlp->namep->dev);
6482 }
6483 if (update_sp_status(sp, minors, count,
6484 MD_SP_RECOVER, mn_set, ep) != 0) {
6485 rval = -1;
6486 goto out;
6487 }
6488
6489 /* update the watermarks */
6490 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
6491 rval = -1;
6492 goto out;
6493 }
6494
6495 if (options & MDCMD_PRINT) {
6496 (void) printf(dgettext(TEXT_DOMAIN, "%s: "
6497 "Soft Partitions recovered from metadb\n"),
6498 compnp->cname);
6499 }
6500
6501 /* return soft partitions to the OK state */
6502 if (update_sp_status(sp, minors, count,
6503 MD_SP_OK, mn_set, ep) != 0) {
6504 rval = -1;
6505 goto out;
6506 }
6507
6508 rval = 0;
6509 goto out;
6510 }
6511 }
6512
6513 if (options & MDCMD_PRINT) {
6514 (void) printf(dgettext(TEXT_DOMAIN,
6515 "%s: Soft Partitions NOT recovered from metadb\n"),
6516 compnp->cname);
6517 }
6518
6519 out:
6520 if (minors != NULL)
6521 Free(minors);
6522 metafreenamelist(spnlp);
6523 meta_sp_list_free(&extlist);
6524 (void) fflush(stdout);
6525 return (rval);
6526 }
6527
6528
6529 /*
6530 * FUNCTION: meta_sp_update_abr()
6531 * INPUT: sp - name of set we are recovering in
6532 * OUTPUT: ep - return error pointer
6533 * RETURNS: int - 0 - success, -1 - error
6534 * PURPOSE: update the ABR state for all soft partitions in the set. This
6535 * is called when joining a set. It sends a message to the master
6536 * node for each soft partition to get the value of tstate and
6537 * then sets ABR ,if required, by opening the sp, setting ABR
6538 * and then closing the sp. This approach is taken rather that
6539 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with
6540 * the case when we have another node simultaneously unsetting ABR.
6541 */
6542 int
meta_sp_update_abr(mdsetname_t * sp,md_error_t * ep)6543 meta_sp_update_abr(
6544 mdsetname_t *sp,
6545 md_error_t *ep
6546 )
6547 {
6548 mdnamelist_t *devnlp = NULL;
6549 mdnamelist_t *p;
6550 mdname_t *devnp = NULL;
6551 md_unit_t *un;
6552 char fname[MAXPATHLEN];
6553 int mnum, fd;
6554 volcap_t vc;
6555 uint_t tstate;
6556
6557
6558 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
6559 return (-1);
6560 }
6561
6562 /* Exit if no soft partitions in this set */
6563 if (devnlp == NULL)
6564 return (0);
6565
6566 /* For each soft partition */
6567 for (p = devnlp; (p != NULL); p = p->next) {
6568 devnp = p->namep;
6569
6570 /* check if this is a top level metadevice */
6571 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL)
6572 goto out;
6573 if (MD_HAS_PARENT(MD_PARENT(un))) {
6574 Free(un);
6575 continue;
6576 }
6577 Free(un);
6578
6579 /* Get tstate from Master */
6580 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) {
6581 mdname_t *np;
6582 np = metamnumname(&sp, meta_getminor(devnp->dev), 0,
6583 ep);
6584 if (np) {
6585 md_perror(dgettext(TEXT_DOMAIN,
6586 "Unable to get tstate for %s"), np->cname);
6587 }
6588 continue;
6589 }
6590 /* If not set on the master, nothing to do */
6591 if (!(tstate & MD_ABR_CAP))
6592 continue;
6593
6594 mnum = meta_getminor(devnp->dev);
6595 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u",
6596 sp->setname, (unsigned)MD_MIN2UNIT(mnum));
6597 if ((fd = open(fname, O_RDWR, 0)) < 0) {
6598 md_perror(dgettext(TEXT_DOMAIN,
6599 "Could not open device %s"), fname);
6600 continue;
6601 }
6602
6603 /* Set ABR state */
6604 vc.vc_info = 0;
6605 vc.vc_set = 0;
6606 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
6607 (void) close(fd);
6608 continue;
6609 }
6610
6611 vc.vc_set = DKV_ABR_CAP;
6612 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
6613 (void) close(fd);
6614 goto out;
6615 }
6616
6617 (void) close(fd);
6618 }
6619 metafreenamelist(devnlp);
6620 return (0);
6621 out:
6622 metafreenamelist(devnlp);
6623 return (-1);
6624 }
6625
6626 /*
6627 * FUNCTION: meta_mn_sp_update_abr()
6628 * INPUT: arg - Given set.
6629 * PURPOSE: update the ABR state for all soft partitions in the set by
6630 * forking a process to call meta_sp_update_abr()
6631 * This function is only called via rpc.metad when adding a node
6632 * to a set, ie this node is beong joined to the set by another
6633 * node.
6634 */
6635 void *
meta_mn_sp_update_abr(void * arg)6636 meta_mn_sp_update_abr(void *arg)
6637 {
6638 set_t setno = *((set_t *)arg);
6639 mdsetname_t *sp;
6640 md_error_t mde = mdnullerror;
6641 int fval;
6642
6643 /* should have a set */
6644 assert(setno != NULL);
6645
6646 if ((sp = metasetnosetname(setno, &mde)) == NULL) {
6647 mde_perror(&mde, "");
6648 return (NULL);
6649 }
6650
6651 if (!(meta_is_mn_set(sp, &mde))) {
6652 mde_perror(&mde, "");
6653 return (NULL);
6654 }
6655
6656 /* fork a process */
6657 if ((fval = md_daemonize(sp, &mde)) != 0) {
6658 /*
6659 * md_daemonize will fork off a process. The is the
6660 * parent or error.
6661 */
6662 if (fval > 0) {
6663 return (NULL);
6664 }
6665 mde_perror(&mde, "");
6666 return (NULL);
6667 }
6668 /*
6669 * Child process should never return back to rpc.metad, but
6670 * should exit.
6671 * Flush all internally cached data inherited from parent process
6672 * since cached data will be cleared when parent process RPC request
6673 * has completed (which is possibly before this child process
6674 * can complete).
6675 * Child process can retrieve and cache its own copy of data from
6676 * rpc.metad that won't be changed by the parent process.
6677 *
6678 * Reset md_in_daemon since this child will be a client of rpc.metad
6679 * not part of the rpc.metad daemon itself.
6680 * md_in_daemon is used by rpc.metad so that libmeta can tell if
6681 * this thread is rpc.metad or any other thread. (If this thread
6682 * was rpc.metad it could use some short circuit code to get data
6683 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
6684 */
6685 md_in_daemon = 0;
6686 metaflushsetname(sp);
6687 sr_cache_flush_setno(setno);
6688 if ((sp = metasetnosetname(setno, &mde)) == NULL) {
6689 mde_perror(&mde, "");
6690 md_exit(sp, 1);
6691 }
6692
6693
6694 /*
6695 * Closing stdin/out/err here.
6696 */
6697 (void) close(0);
6698 (void) close(1);
6699 (void) close(2);
6700 assert(fval == 0);
6701
6702 (void) meta_sp_update_abr(sp, &mde);
6703
6704 md_exit(sp, 0);
6705 /*NOTREACHED*/
6706 return (NULL);
6707 }
6708
6709 int
meta_sp_check_component(mdsetname_t * sp,mdname_t * np,md_error_t * ep)6710 meta_sp_check_component(
6711 mdsetname_t *sp,
6712 mdname_t *np,
6713 md_error_t *ep
6714 )
6715 {
6716 md_sp_t *msp;
6717 minor_t mnum = 0;
6718 md_dev64_t dev = 0;
6719 mdnm_params_t nm;
6720 md_getdevs_params_t mgd;
6721 side_t sideno;
6722 char *miscname;
6723 md_dev64_t *mydev = NULL;
6724 char *pname = NULL, *t;
6725 char *ctd_name = NULL;
6726 char *devname = NULL;
6727 int len;
6728 int rval = -1;
6729
6730 (void) memset(&nm, '\0', sizeof (nm));
6731 if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL)
6732 return (-1);
6733
6734 if ((miscname = metagetmiscname(np, ep)) == NULL)
6735 return (-1);
6736
6737 sideno = getmyside(sp, ep);
6738
6739 meta_sp_debug("meta_sp_check_component: %s is on %s key: %d"
6740 " dev: %llu\n",
6741 np->cname, msp->compnamep->cname, msp->compnamep->key,
6742 msp->compnamep->dev);
6743
6744 /*
6745 * Now get the data from the unit structure. The compnamep stuff
6746 * contains the data from the namespace and we need the un_dev
6747 * from the unit structure.
6748 */
6749 (void) memset(&mgd, '\0', sizeof (mgd));
6750 MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
6751 mgd.cnt = 1; /* sp's only have one subdevice */
6752 mgd.mnum = meta_getminor(np->dev);
6753
6754 mydev = Zalloc(sizeof (*mydev));
6755 mgd.devs = (uintptr_t)mydev;
6756
6757 if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
6758 meta_sp_debug("meta_sp_check_component: ioctl failed\n");
6759 (void) mdstealerror(ep, &mgd.mde);
6760 rval = 0;
6761 goto out;
6762 } else if (mgd.cnt <= 0) {
6763 assert(mgd.cnt >= 0);
6764 rval = 0;
6765 goto out;
6766 }
6767
6768 /* Get the devname from the name space. */
6769 if ((devname = meta_getnmentbykey(sp->setno, sideno,
6770 msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) {
6771 meta_sp_debug("meta_sp_check_component: key %d not"
6772 "found\n", msp->compnamep->key);
6773 goto out;
6774 }
6775
6776 meta_sp_debug("dev %s from component: (%lu, %lu)\n",
6777 devname,
6778 meta_getmajor(*mydev),
6779 meta_getminor(*mydev));
6780 meta_sp_debug("minor from the namespace: %lu\n", mnum);
6781
6782 if (mnum != meta_getminor(*mydev)) {
6783 /*
6784 * The minor numbers are different. Update the namespace
6785 * with the information from the component.
6786 */
6787
6788 t = strrchr(devname, '/');
6789 t++;
6790 ctd_name = Strdup(t);
6791
6792 meta_sp_debug("meta_sp_check_component: ctd_name: %s\n",
6793 ctd_name);
6794
6795 len = strlen(devname);
6796 t = strrchr(devname, '/');
6797 t++;
6798 pname = Zalloc((len - strlen(t)) + 1);
6799 (void) strncpy(pname, devname, (len - strlen(t)));
6800 meta_sp_debug("pathname: %s\n", pname);
6801
6802 meta_sp_debug("updating the minor number to %lu\n", nm.mnum);
6803
6804 if (meta_update_namespace(sp->setno, sideno,
6805 ctd_name, *mydev, msp->compnamep->key, pname,
6806 ep) != 0) {
6807 goto out;
6808 }
6809 }
6810 out:
6811 if (pname != NULL)
6812 Free(pname);
6813 if (ctd_name != NULL)
6814 Free(ctd_name);
6815 if (devname != NULL)
6816 Free(devname);
6817 if (mydev != NULL)
6818 Free(mydev);
6819 return (rval);
6820 }
6821