1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Just in case we're not in a build environment, make sure that 29 * TEXT_DOMAIN gets set to something. 30 */ 31 #if !defined(TEXT_DOMAIN) 32 #define TEXT_DOMAIN "SYS_TEST" 33 #endif 34 35 /* 36 * soft partition operations 37 * 38 * Soft Partitions provide a virtual disk mechanism which is used to 39 * divide a large volume into many small pieces, each appearing as a 40 * separate device. A soft partition consists of a series of extents, 41 * each having an offset and a length. The extents are logically 42 * contiguous, so where the first extent leaves off the second extent 43 * picks up. Which extent a given "virtual offset" belongs to is 44 * dependent on the size of all the previous extents in the soft 45 * partition. 46 * 47 * Soft partitions are represented in memory by an extent node 48 * (sp_ext_node_t) which contains all of the information necessary to 49 * create a unit structure and update the on-disk format, called 50 * "watermarks". These extent nodes are typically kept in a doubly 51 * linked list and are manipulated by list manipulation routines. A 52 * list of extents may represent all of the soft partitions on a volume, 53 * a single soft partition, or perhaps just a set of extents that need 54 * to be updated. Extent lists may be sorted by extent or by name/seq#, 55 * depending on which compare function is used. Most of the routines 56 * require the list be sorted by offset to work, and that's the typical 57 * configuration. 58 * 59 * In order to do an allocation, knowledge of all soft partitions on the 60 * volume is required. Then free space is determined from the space 61 * that is not allocated, and new allocations can be made from the free 62 * space. Once the new allocations are made, a unit structure is created 63 * and the watermarks are updated. The status is then changed to "okay" 64 * on the unit structure to commit the transaction. If updating the 65 * watermarks fails, the unit structure is in an intermediate state and 66 * the driver will not allow access to the device. 67 * 68 * A typical sequence of events is: 69 * 1. Fetch the list of names for all soft partitions on a volume 70 * meta_sp_get_by_component() 71 * 2. Construct an extent list from the name list 72 * meta_sp_extlist_from_namelist() 73 * 3. Fill the gaps in the extent list with free extents 74 * meta_sp_list_freefill() 75 * 4. Allocate from the free extents 76 * meta_sp_alloc_by_len() 77 * meta_sp_alloc_by_list() 78 * 5. Create the unit structure from the extent list 79 * meta_sp_createunit() 80 * meta_sp_updateunit() 81 * 6. Write out the watermarks 82 * meta_sp_update_wm() 83 * 7. Set the status to "Okay" 84 * meta_sp_setstatus() 85 * 86 */ 87 88 #include <stdio.h> 89 #include <meta.h> 90 #include "meta_repartition.h" 91 #include <sys/lvm/md_sp.h> 92 #include <sys/lvm/md_crc.h> 93 #include <strings.h> 94 #include <sys/lvm/md_mirror.h> 95 #include <sys/bitmap.h> 96 97 extern int md_in_daemon; 98 99 typedef struct sp_ext_node { 100 struct sp_ext_node *ext_next; /* next element */ 101 struct sp_ext_node *ext_prev; /* previous element */ 102 sp_ext_type_t ext_type; /* type of extent */ 103 sp_ext_offset_t ext_offset; /* starting offset */ 104 sp_ext_length_t ext_length; /* length of this node */ 105 uint_t ext_flags; /* extent flags */ 106 uint32_t ext_seq; /* watermark seq no */ 107 mdname_t *ext_namep; /* name pointer */ 108 mdsetname_t *ext_setp; /* set pointer */ 109 } sp_ext_node_t; 110 111 /* extent flags */ 112 #define EXTFLG_UPDATE (1) 113 114 /* Extent node compare function for list sorting */ 115 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 116 117 118 /* Function Prototypes */ 119 120 /* Debugging Functions */ 121 static void meta_sp_debug(char *format, ...); 122 static void meta_sp_printunit(mp_unit_t *mp); 123 124 /* Misc Support Functions */ 125 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 126 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 127 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 128 md_error_t *ep); 129 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 130 mdnamelist_t **nlpp, int force, md_error_t *ep); 131 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 132 mdname_t *compnp, md_error_t *ep); 133 134 /* Extent List Manipulation Functions */ 135 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 136 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 137 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 138 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 139 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 140 static void meta_sp_list_free(sp_ext_node_t **head); 141 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 142 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 143 sp_ext_type_t exttype, int exclude_wm); 144 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 145 sp_ext_offset_t offset); 146 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 147 sp_ext_length_t size); 148 static void meta_sp_list_dump(sp_ext_node_t *head); 149 static int meta_sp_list_overlaps(sp_ext_node_t *head); 150 151 /* Extent List Query Functions */ 152 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 153 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 154 sp_ext_length_t alignment); 155 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 156 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 157 md_error_t *ep); 158 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 159 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 160 161 162 /* Extent Allocation Functions */ 163 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 164 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 165 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 166 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 167 sp_ext_node_t **extlist, sp_ext_length_t *lp, 168 sp_ext_offset_t last_off, sp_ext_length_t alignment); 169 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 170 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 171 172 /* Extent List Population Functions */ 173 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 174 sp_ext_node_t **extlist, md_error_t *ep); 175 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 176 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 177 178 /* Print (metastat) Functions */ 179 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 180 mdprtopts_t options, md_error_t *ep); 181 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 182 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 183 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 184 185 /* Watermark Manipulation Functions */ 186 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 187 sp_ext_node_t *extlist, md_error_t *ep); 188 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 189 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 190 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 191 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 192 md_error_t *ep); 193 194 /* Unit Structure Manipulation Functions */ 195 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 196 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 197 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 198 sp_status_t status, md_error_t *ep); 199 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 200 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 201 md_error_t *ep); 202 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 203 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 204 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 205 int *repart_options, md_error_t *ep); 206 207 /* Reset (metaclear) Functions */ 208 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 209 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 210 211 /* Recovery (metarecover) Functions */ 212 static void meta_sp_display_exthdr(void); 213 static void meta_sp_display_ext(sp_ext_node_t *ext); 214 static int meta_sp_checkseq(sp_ext_node_t *extlist); 215 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 216 mdname_t **, md_error_t *); 217 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 218 mdcmdopts_t options, md_error_t *ep); 219 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 220 mdcmdopts_t options, md_error_t *ep); 221 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 222 mdcmdopts_t options, md_error_t *ep); 223 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 224 sp_ext_node_t *unitext, md_error_t *ep); 225 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 226 mdcmdopts_t options, md_error_t *ep); 227 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 228 mdcmdopts_t options, md_error_t *ep); 229 230 /* 231 * Private Constants 232 */ 233 234 static const int FORCE_RELOAD_CACHE = 1; 235 static const uint_t NO_FLAGS = 0; 236 static const sp_ext_offset_t NO_OFFSET = 0ULL; 237 static const uint_t NO_SEQUENCE_NUMBER = 0; 238 static const int ONE_SOFT_PARTITION = 1; 239 240 static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)]; 241 242 #define TEST_SOFT_PARTITION_NAMEP NULL 243 #define TEST_SETNAMEP NULL 244 245 #define EXCLUDE_WM (1) 246 #define INCLUDE_WM (0) 247 248 #define SP_UNALIGNED (0LL) 249 250 /* 251 * ************************************************************************** 252 * Debugging Functions * 253 * ************************************************************************** 254 */ 255 256 /*PRINTFLIKE1*/ 257 static void 258 meta_sp_debug(char *format, ...) 259 { 260 static int debug; 261 static int debug_set = 0; 262 va_list ap; 263 264 if (!debug_set) { 265 debug = getenv(META_SP_DEBUG) ? 1 : 0; 266 debug_set = 1; 267 } 268 269 if (debug) { 270 va_start(ap, format); 271 (void) vfprintf(stderr, format, ap); 272 va_end(ap); 273 } 274 } 275 276 static void 277 meta_sp_printunit(mp_unit_t *mp) 278 { 279 int i; 280 281 if (mp == NULL) 282 return; 283 284 /* print the common fields we know about */ 285 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 286 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 287 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 288 289 /* sp-specific fields */ 290 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 291 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 292 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 293 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 294 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 295 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 296 297 /* print extent information */ 298 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 299 for (i = 0; i < mp->un_numexts; i++) { 300 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 301 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 302 mp->un_ext[i].un_len); 303 } 304 } 305 306 /* 307 * FUNCTION: meta_sp_parsesize() 308 * INPUT: s - the string to parse 309 * OUTPUT: *szp - disk block count (0 for "all") 310 * RETURNS: -1 for error, 0 for success 311 * PURPOSE: parses the command line parameter that specifies the 312 * requested size of a soft partition. The input string 313 * is either the literal "all" or a numeric value 314 * followed by a single character, b for disk blocks, k 315 * for kilobytes, m for megabytes, g for gigabytes, or t 316 * for terabytes. p for petabytes and e for exabytes 317 * have been added as undocumented features for future 318 * expansion. For example, 100m is 100 megabytes, while 319 * 50g is 50 gigabytes. All values are rounded up to the 320 * nearest block size. 321 */ 322 int 323 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 324 { 325 if (s == NULL || szp == NULL) { 326 return (-1); 327 } 328 329 /* Check for literal "all" */ 330 if (strcasecmp(s, "all") == 0) { 331 *szp = 0; 332 return (0); 333 } 334 335 return (meta_sp_parsesizestring(s, szp)); 336 } 337 338 /* 339 * FUNCTION: meta_sp_parsesizestring() 340 * INPUT: s - the string to parse 341 * OUTPUT: *szp - disk block count 342 * RETURNS: -1 for error, 0 for success 343 * PURPOSE: parses a string that specifies size. The input string is a 344 * numeric value followed by a single character, b for disk blocks, 345 * k for kilobytes, m for megabytes, g for gigabytes, or t for 346 * terabytes. p for petabytes and e for exabytes have been added 347 * as undocumented features for future expansion. For example, 348 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 349 * are rounded up to the nearest block size. 350 */ 351 static int 352 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 353 { 354 sp_ext_length_t len = 0; 355 char len_type[2]; 356 357 if (s == NULL || szp == NULL) { 358 return (-1); 359 } 360 361 /* 362 * make sure block offset does not overflow 2^64 bytes. 363 */ 364 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 365 (len == 0LL) || 366 (len > (1LL << (64 - DEV_BSHIFT)))) 367 return (-1); 368 369 switch (len_type[0]) { 370 case 'B': 371 case 'b': 372 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 373 break; 374 case 'K': 375 case 'k': 376 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 377 break; 378 case 'M': 379 case 'm': 380 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 381 break; 382 case 'g': 383 case 'G': 384 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 385 break; 386 case 't': 387 case 'T': 388 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 389 DEV_BSIZE)); 390 break; 391 case 'p': 392 case 'P': 393 len = lbtodb(roundup( 394 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 395 DEV_BSIZE)); 396 break; 397 case 'e': 398 case 'E': 399 len = lbtodb(roundup( 400 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 401 DEV_BSIZE)); 402 break; 403 default: 404 /* error */ 405 return (-1); 406 } 407 408 *szp = len; 409 return (0); 410 } 411 412 /* 413 * FUNCTION: meta_sp_setgeom() 414 * INPUT: np - the underlying device to setup geometry for 415 * compnp - the underlying device to setup geometry for 416 * mp - the unit structure to set the geometry for 417 * OUTPUT: ep - return error pointer 418 * RETURNS: int - -1 if error, 0 otherwise 419 * PURPOSE: establishes geometry information for a device 420 */ 421 static int 422 meta_sp_setgeom( 423 mdname_t *np, 424 mdname_t *compnp, 425 mp_unit_t *mp, 426 md_error_t *ep 427 ) 428 { 429 mdgeom_t *geomp; 430 uint_t round_cyl = 0; 431 432 if ((geomp = metagetgeom(compnp, ep)) == NULL) 433 return (-1); 434 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 435 geomp->read_reinstruct, round_cyl, ep) != 0) 436 return (-1); 437 438 return (0); 439 } 440 441 /* 442 * FUNCTION: meta_sp_setstatus() 443 * INPUT: sp - the set name for the devices to set the status on 444 * minors - an array of minor numbers of devices to set status on 445 * num_units - number of entries in the array 446 * status - status value to set all units to 447 * OUTPUT: ep - return error pointer 448 * RETURNS: int - -1 if error, 0 success 449 * PURPOSE: sets the status of one or more soft partitions to the 450 * requested value 451 */ 452 int 453 meta_sp_setstatus( 454 mdsetname_t *sp, 455 minor_t *minors, 456 int num_units, 457 sp_status_t status, 458 md_error_t *ep 459 ) 460 { 461 md_sp_statusset_t status_params; 462 463 assert(minors != NULL); 464 465 /* update status of all soft partitions to the status passed in */ 466 (void) memset(&status_params, 0, sizeof (status_params)); 467 status_params.num_units = num_units; 468 status_params.new_status = status; 469 status_params.size = num_units * sizeof (minor_t); 470 status_params.minors = (uintptr_t)minors; 471 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 472 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 473 NULL) != 0) { 474 (void) mdstealerror(ep, &status_params.mde); 475 return (-1); 476 } 477 return (0); 478 } 479 480 /* 481 * FUNCTION: meta_get_sp_names() 482 * INPUT: sp - the set name to get soft partitions from 483 * options - options from the command line 484 * OUTPUT: nlpp - list of all soft partition names 485 * ep - return error pointer 486 * RETURNS: int - -1 if error, 0 success 487 * PURPOSE: returns a list of all soft partitions in the metadb 488 * for all devices in the specified set 489 */ 490 int 491 meta_get_sp_names( 492 mdsetname_t *sp, 493 mdnamelist_t **nlpp, 494 int options, 495 md_error_t *ep 496 ) 497 { 498 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 499 } 500 501 /* 502 * FUNCTION: meta_get_by_component() 503 * INPUT: sp - the set name to get soft partitions from 504 * compnp - the name of the device containing the soft 505 * partitions that will be returned 506 * force - 0 - reads cached namelist if available, 507 * 1 - reloads cached namelist, frees old namelist 508 * OUTPUT: nlpp - list of all soft partition names 509 * ep - return error pointer 510 * RETURNS: int - -1 error, otherwise the number of soft partitions 511 * found on the component (0 = none found). 512 * PURPOSE: returns a list of all soft partitions on a given device 513 * from the metadb information 514 */ 515 static int 516 meta_sp_get_by_component( 517 mdsetname_t *sp, 518 mdname_t *compnp, 519 mdnamelist_t **nlpp, 520 int force, 521 md_error_t *ep 522 ) 523 { 524 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 525 static int cached_count = 0; /* cached count */ 526 mdnamelist_t *spnlp = NULL; /* all sp names */ 527 mdnamelist_t *namep; /* list iterator */ 528 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 529 mdnamelist_t **cachetailpp; /* cache tail */ 530 md_sp_t *msp; /* unit structure */ 531 int count = 0; /* count of sp's */ 532 int err; 533 mdname_t *curnp; 534 535 if ((cached_list != NULL) && (!force)) { 536 /* return a copy of the cached list */ 537 for (namep = cached_list; namep != NULL; namep = namep->next) 538 tailpp = meta_namelist_append_wrapper(tailpp, 539 namep->namep); 540 return (cached_count); 541 } 542 543 /* free the cache and reset values to zeros to prepare for a new list */ 544 metafreenamelist(cached_list); 545 cached_count = 0; 546 cached_list = NULL; 547 cachetailpp = &cached_list; 548 *nlpp = NULL; 549 550 /* get all the softpartitions first of all */ 551 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 552 return (-1); 553 554 /* 555 * Now for each sp, see if it resides on the component we 556 * are interested in, if so then add it to our list 557 */ 558 for (namep = spnlp; namep != NULL; namep = namep->next) { 559 curnp = namep->namep; 560 561 /* get the unit structure */ 562 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 563 continue; 564 565 /* 566 * If the current soft partition is not on the same 567 * component, continue the search. If it is on the same 568 * component, add it to our namelist. 569 */ 570 err = meta_check_samedrive(compnp, msp->compnamep, ep); 571 if (err <= 0) { 572 /* not on the same device, check the next one */ 573 continue; 574 } 575 576 /* it's on the same drive */ 577 578 /* 579 * Check for overlapping partitions if the component is not 580 * a metadevice. 581 */ 582 if (!metaismeta(msp->compnamep)) { 583 /* 584 * if they're on the same drive, neither 585 * should be a metadevice if one isn't 586 */ 587 assert(!metaismeta(compnp)); 588 589 if (meta_check_overlap(msp->compnamep->cname, 590 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 591 continue; 592 593 /* in this case it's not an error for them to overlap */ 594 mdclrerror(ep); 595 } 596 597 /* Component is on the same device, add to the used list */ 598 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 599 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 600 curnp); 601 602 ++count; 603 ++cached_count; 604 } 605 606 assert(count == cached_count); 607 return (count); 608 609 out: 610 metafreenamelist(*nlpp); 611 *nlpp = NULL; 612 return (-1); 613 } 614 615 /* 616 * FUNCTION: meta_sp_get_default_alignment() 617 * INPUT: sp - the pertinent set name 618 * compnp - the name of the underlying component 619 * OUTPUT: ep - return error pointer 620 * RETURNS: sp_ext_length_t =0: no default alignment 621 * >0: default alignment 622 * PURPOSE: returns the default alignment for soft partitions to 623 * be built on top of the specified component or 624 * metadevice 625 */ 626 static sp_ext_length_t 627 meta_sp_get_default_alignment( 628 mdsetname_t *sp, 629 mdname_t *compnp, 630 md_error_t *ep 631 ) 632 { 633 sp_ext_length_t a = SP_UNALIGNED; 634 char *mname; 635 636 assert(compnp != NULL); 637 638 /* 639 * We treat raw devices as opaque, and assume nothing about 640 * their alignment requirements. 641 */ 642 if (!metaismeta(compnp)) 643 return (SP_UNALIGNED); 644 645 /* 646 * We already know it's a metadevice from the previous test; 647 * metagetmiscname() will tell us which metadevice type we 648 * have 649 */ 650 mname = metagetmiscname(compnp, ep); 651 if (mname == NULL) 652 goto out; 653 654 /* 655 * For a mirror, we want to deal with the stripe that is the 656 * primary side. If it happens to be asymmetrically 657 * configured, there is no simple way to fake a universal 658 * alignment. There's a chance that the least common 659 * denominator of the set of interlaces from all stripes of 660 * all submirrors would do it, but nobody that really cared 661 * that much about this issue would create an asymmetric 662 * config to start with. 663 * 664 * If the component underlying the soft partition is a mirror, 665 * then at the exit of this loop, compnp will have been 666 * updated to describe the first active submirror. 667 */ 668 if (strcmp(mname, MD_MIRROR) == 0) { 669 md_mirror_t *mp; 670 int smi; 671 md_submirror_t *smp; 672 673 mp = meta_get_mirror(sp, compnp, ep); 674 if (mp == NULL) 675 goto out; 676 677 for (smi = 0; smi < NMIRROR; smi++) { 678 679 smp = &mp->submirrors[smi]; 680 if (smp->state == SMS_UNUSED) 681 continue; 682 683 compnp = smp->submirnamep; 684 assert(compnp != NULL); 685 686 mname = metagetmiscname(compnp, ep); 687 if (mname == NULL) 688 goto out; 689 690 break; 691 } 692 693 if (smi == NMIRROR) 694 goto out; 695 } 696 697 /* 698 * Handle stripes and submirrors identically; just return the 699 * interlace of the first row. 700 */ 701 if (strcmp(mname, MD_STRIPE) == 0) { 702 md_stripe_t *stp; 703 704 stp = meta_get_stripe(sp, compnp, ep); 705 if (stp == NULL) 706 goto out; 707 708 a = stp->rows.rows_val[0].interlace; 709 goto out; 710 } 711 712 /* 713 * Raid is even more straightforward; the interlace applies to 714 * the entire device. 715 */ 716 if (strcmp(mname, MD_RAID) == 0) { 717 md_raid_t *rp; 718 719 rp = meta_get_raid(sp, compnp, ep); 720 if (rp == NULL) 721 goto out; 722 723 a = rp->interlace; 724 goto out; 725 } 726 727 /* 728 * If we have arrived here with the alignment still not set, 729 * then we expect the error to have been set by one of the 730 * routines we called. If neither is the case, something has 731 * really gone wrong above. (Probably the submirror walk 732 * failed to produce a valid submirror, but that would be 733 * really bad...) 734 */ 735 out: 736 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 737 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 738 739 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 740 mde_perror(ep, NULL); 741 } 742 743 assert((a > 0) || (!mdisok(ep))); 744 745 return (a); 746 } 747 748 749 750 /* 751 * FUNCTION: meta_check_insp() 752 * INPUT: sp - the set name for the device to check 753 * np - the name of the device to check 754 * slblk - the starting offset of the device to check 755 * nblks - the number of blocks in the device to check 756 * OUTPUT: ep - return error pointer 757 * RETURNS: int - 0 - device contains soft partitions 758 * -1 - device does not contain soft partitions 759 * PURPOSE: determines whether a device contains any soft partitions 760 */ 761 /* ARGSUSED */ 762 int 763 meta_check_insp( 764 mdsetname_t *sp, 765 mdname_t *np, 766 diskaddr_t slblk, 767 diskaddr_t nblks, 768 md_error_t *ep 769 ) 770 { 771 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 772 int count; 773 int rval; 774 775 /* check set pointer */ 776 assert(sp != NULL); 777 778 /* 779 * Get a list of the soft partitions that currently reside on 780 * the component. We should ALWAYS force reload the cache, 781 * because if we're using the md.tab, we must rebuild 782 * the list because it won't contain the previous (if any) 783 * soft partition. 784 */ 785 /* find all soft partitions on the component */ 786 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 787 788 if (count == -1) { 789 rval = -1; 790 } else if (count > 0) { 791 rval = mduseerror(ep, MDE_ALREADY, np->dev, 792 spnlp->namep->cname, np->cname); 793 } else { 794 rval = 0; 795 } 796 797 metafreenamelist(spnlp); 798 return (rval); 799 } 800 801 /* 802 * ************************************************************************** 803 * Extent List Manipulation Functions * 804 * ************************************************************************** 805 */ 806 807 /* 808 * FUNCTION: meta_sp_cmp_by_nameseq() 809 * INPUT: e1 - first node to compare 810 * e2 - second node to compare 811 * OUTPUT: none 812 * RETURNS: int - =0 - nodes are equal 813 * <0 - e1 should go before e2 814 * >0 - e1 should go after e2 815 * PURPOSE: used for sorted list inserts to build a list sorted by 816 * name first and sequence number second. 817 */ 818 static int 819 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 820 { 821 int rval; 822 823 if (e1->ext_namep == NULL) 824 return (1); 825 if (e2->ext_namep == NULL) 826 return (-1); 827 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 828 return (rval); 829 830 /* the names are equal, compare sequence numbers */ 831 if (e1->ext_seq > e2->ext_seq) 832 return (1); 833 if (e1->ext_seq < e2->ext_seq) 834 return (-1); 835 /* sequence numbers are also equal */ 836 return (0); 837 } 838 839 /* 840 * FUNCTION: meta_sp_cmp_by_offset() 841 * INPUT: e1 - first node to compare 842 * e2 - second node to compare 843 * OUTPUT: none 844 * RETURNS: int - =0 - nodes are equal 845 * <0 - e1 should go before e2 846 * >0 - e1 should go after e2 847 * PURPOSE: used for sorted list inserts to build a list sorted by offset 848 */ 849 static int 850 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 851 { 852 if (e1->ext_offset > e2->ext_offset) 853 return (1); 854 if (e1->ext_offset < e2->ext_offset) 855 return (-1); 856 /* offsets are equal */ 857 return (0); 858 } 859 860 /* 861 * FUNCTION: meta_sp_list_insert() 862 * INPUT: sp - the set name for the device the node belongs to 863 * np - the name of the device the node belongs to 864 * head - the head of the list, must be NULL for empty list 865 * offset - the physical offset of this extent in sectors 866 * length - the length of this extent in sectors 867 * type - the type of the extent being inserted 868 * seq - the sequence number of the extent being inserted 869 * flags - extent flags (eg. whether it needs to be updated) 870 * compare - the compare function to use 871 * OUTPUT: head - points to the new head if a node was inserted 872 * at the beginning 873 * RETURNS: void 874 * PURPOSE: inserts an extent node into a sorted doubly linked list. 875 * The sort order is determined by the compare function. 876 * Memory is allocated for the node in this function and it 877 * is up to the caller to free it, possibly using 878 * meta_sp_list_free(). If a node is inserted at the 879 * beginning of the list, the head pointer is updated to 880 * point to the new first node. 881 */ 882 static void 883 meta_sp_list_insert( 884 mdsetname_t *sp, 885 mdname_t *np, 886 sp_ext_node_t **head, 887 sp_ext_offset_t offset, 888 sp_ext_length_t length, 889 sp_ext_type_t type, 890 uint_t seq, 891 uint_t flags, 892 ext_cmpfunc_t compare 893 ) 894 { 895 sp_ext_node_t *newext; 896 sp_ext_node_t *curext; 897 898 assert(head != NULL); 899 900 /* Don't bother adding zero length nodes */ 901 if (length == 0ULL) 902 return; 903 904 /* allocate and fill in new ext_node */ 905 newext = Zalloc(sizeof (sp_ext_node_t)); 906 907 newext->ext_offset = offset; 908 newext->ext_length = length; 909 newext->ext_flags = flags; 910 newext->ext_type = type; 911 newext->ext_seq = seq; 912 newext->ext_setp = sp; 913 newext->ext_namep = np; 914 915 /* first node in the list */ 916 if (*head == NULL) { 917 newext->ext_next = newext->ext_prev = NULL; 918 *head = newext; 919 } else if ((*compare)(*head, newext) >= 0) { 920 /* the first node has a bigger offset, so insert before it */ 921 assert((*head)->ext_prev == NULL); 922 923 newext->ext_prev = NULL; 924 newext->ext_next = *head; 925 (*head)->ext_prev = newext; 926 *head = newext; 927 } else { 928 /* 929 * find the next node whose offset is greater than 930 * the one we want to insert, or the end of the list. 931 */ 932 for (curext = *head; 933 (curext->ext_next != NULL) && 934 ((*compare)(curext->ext_next, newext) < 0); 935 (curext = curext->ext_next)) 936 ; 937 938 /* link the new node in after the current node */ 939 newext->ext_next = curext->ext_next; 940 newext->ext_prev = curext; 941 942 if (curext->ext_next != NULL) 943 curext->ext_next->ext_prev = newext; 944 945 curext->ext_next = newext; 946 } 947 } 948 949 /* 950 * FUNCTION: meta_sp_list_free() 951 * INPUT: head - the head of the list, must be NULL for empty list 952 * OUTPUT: head - points to NULL on return 953 * RETURNS: void 954 * PURPOSE: walks a double linked extent list and frees each node 955 */ 956 static void 957 meta_sp_list_free(sp_ext_node_t **head) 958 { 959 sp_ext_node_t *ext; 960 sp_ext_node_t *next; 961 962 assert(head != NULL); 963 964 ext = *head; 965 while (ext) { 966 next = ext->ext_next; 967 Free(ext); 968 ext = next; 969 } 970 *head = NULL; 971 } 972 973 /* 974 * FUNCTION: meta_sp_list_remove() 975 * INPUT: head - the head of the list, must be NULL for empty list 976 * ext - the extent to remove, must be a member of the list 977 * OUTPUT: head - points to the new head of the list 978 * RETURNS: void 979 * PURPOSE: unlinks the node specified by ext from the list and 980 * frees it, possibly moving the head pointer forward if 981 * the head is the node being removed. 982 */ 983 static void 984 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 985 { 986 assert(head != NULL); 987 assert(*head != NULL); 988 989 if (*head == ext) 990 *head = ext->ext_next; 991 992 if (ext->ext_prev != NULL) 993 ext->ext_prev->ext_next = ext->ext_next; 994 if (ext->ext_next != NULL) 995 ext->ext_next->ext_prev = ext->ext_prev; 996 Free(ext); 997 } 998 999 /* 1000 * FUNCTION: meta_sp_list_size() 1001 * INPUT: head - the head of the list, must be NULL for empty list 1002 * exttype - the type of the extents to sum 1003 * exclude_wm - subtract space for extent headers from total 1004 * OUTPUT: none 1005 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1006 * PURPOSE: sums the lengths of all extents in the list matching the 1007 * specified type. This could be used for computing the 1008 * amount of free or used space, for example. 1009 */ 1010 static sp_ext_length_t 1011 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1012 { 1013 sp_ext_node_t *ext; 1014 sp_ext_length_t size = 0LL; 1015 1016 for (ext = head; ext != NULL; ext = ext->ext_next) 1017 if (ext->ext_type == exttype) 1018 size += ext->ext_length - 1019 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1020 1021 return (size); 1022 } 1023 1024 /* 1025 * FUNCTION: meta_sp_list_find() 1026 * INPUT: head - the head of the list, must be NULL for empty list 1027 * offset - the offset contained by the node to find 1028 * OUTPUT: none 1029 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1030 * or NULL if no such nodes were found. 1031 * PURPOSE: finds a node in a list containing the requested offset 1032 * (inclusive). If multiple nodes contain this offset then 1033 * only the first will be returned, though typically these 1034 * lists are managed with non-overlapping nodes. 1035 * 1036 * *The list MUST be sorted by offset for this function to work.* 1037 */ 1038 static sp_ext_node_t * 1039 meta_sp_list_find( 1040 sp_ext_node_t *head, 1041 sp_ext_offset_t offset 1042 ) 1043 { 1044 sp_ext_node_t *ext; 1045 1046 for (ext = head; ext != NULL; ext = ext->ext_next) { 1047 /* check if the offset lies within this extent */ 1048 if ((offset >= ext->ext_offset) && 1049 (offset < ext->ext_offset + ext->ext_length)) { 1050 /* 1051 * the requested extent should always be a 1052 * subset of an extent in the list. 1053 */ 1054 return (ext); 1055 } 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * FUNCTION: meta_sp_list_freefill() 1062 * INPUT: head - the head of the list, must be NULL for empty list 1063 * size - the size of the volume this extent list is 1064 * representing 1065 * OUTPUT: head - the new head of the list 1066 * RETURNS: void 1067 * PURPOSE: finds gaps in the extent list and fills them with a free 1068 * node. If there is a gap at the beginning the head 1069 * pointer will be changed to point to the new free node. 1070 * If there is free space at the end, the last free extent 1071 * will extend all the way out to the size specified. 1072 * 1073 * *The list MUST be sorted by offset for this function to work.* 1074 */ 1075 static void 1076 meta_sp_list_freefill( 1077 sp_ext_node_t **head, 1078 sp_ext_length_t size 1079 ) 1080 { 1081 sp_ext_node_t *ext; 1082 sp_ext_offset_t curoff = 0LL; 1083 1084 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1085 if (curoff < ext->ext_offset) 1086 meta_sp_list_insert(NULL, NULL, head, 1087 curoff, ext->ext_offset - curoff, 1088 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1089 curoff = ext->ext_offset + ext->ext_length; 1090 } 1091 1092 /* pad inverse list out to the end */ 1093 if (curoff < size) 1094 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1095 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1096 1097 if (getenv(META_SP_DEBUG)) { 1098 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1099 "holes freefilled:\n"); 1100 meta_sp_list_dump(*head); 1101 } 1102 } 1103 1104 /* 1105 * FUNCTION: meta_sp_list_dump() 1106 * INPUT: head - the head of the list, must be NULL for empty list 1107 * OUTPUT: none 1108 * RETURNS: void 1109 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1110 */ 1111 static void 1112 meta_sp_list_dump(sp_ext_node_t *head) 1113 { 1114 sp_ext_node_t *ext; 1115 1116 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1117 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1118 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1119 "Next"); 1120 for (ext = head; ext != NULL; ext = ext->ext_next) { 1121 if (ext->ext_namep != NULL) 1122 meta_sp_debug("%5s", ext->ext_namep->cname); 1123 else 1124 meta_sp_debug("%5s", "NONE"); 1125 1126 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1127 switch (ext->ext_type) { 1128 case EXTTYP_ALLOC: 1129 meta_sp_debug("%7s ", "ALLOC"); 1130 break; 1131 case EXTTYP_FREE: 1132 meta_sp_debug("%7s ", "FREE"); 1133 break; 1134 case EXTTYP_END: 1135 meta_sp_debug("%7s ", "END"); 1136 break; 1137 case EXTTYP_RESERVED: 1138 meta_sp_debug("%7s ", "RESV"); 1139 break; 1140 default: 1141 meta_sp_debug("%7s ", "INVLD"); 1142 break; 1143 } 1144 1145 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1146 ext->ext_offset, ext->ext_length, 1147 ext->ext_flags, (void *) ext->ext_prev, 1148 (void *) ext->ext_next); 1149 } 1150 meta_sp_debug("\n"); 1151 } 1152 1153 /* 1154 * FUNCTION: meta_sp_list_overlaps() 1155 * INPUT: head - the head of the list, must be NULL for empty list 1156 * OUTPUT: none 1157 * RETURNS: int - 1 if extents overlap, 0 if ok 1158 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1159 * offset for this function to work properly. 1160 */ 1161 static int 1162 meta_sp_list_overlaps(sp_ext_node_t *head) 1163 { 1164 sp_ext_node_t *ext; 1165 1166 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1167 if (ext->ext_offset + ext->ext_length > 1168 ext->ext_next->ext_offset) 1169 return (1); 1170 } 1171 return (0); 1172 } 1173 1174 /* 1175 * ************************************************************************** 1176 * Extent Allocation Functions * 1177 * ************************************************************************** 1178 */ 1179 1180 /* 1181 * FUNCTION: meta_sp_alloc_by_ext() 1182 * INPUT: sp - the set name for the device the node belongs to 1183 * np - the name of the device the node belongs to 1184 * head - the head of the list, must be NULL for empty list 1185 * free_ext - the free extent being allocated from 1186 * alloc_offset - the offset of the allocation 1187 * alloc_len - the length of the allocation 1188 * seq - the sequence number of the allocation 1189 * OUTPUT: head - the new head pointer 1190 * RETURNS: void 1191 * PURPOSE: allocates a portion of the free extent free_ext. The 1192 * allocated portion starts at alloc_offset and is 1193 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1194 * alloc_length) must be contained within the free extent. 1195 * 1196 * The free extent is split into as many as 3 pieces - a 1197 * free extent containing [ free_offset .. alloc_offset ), an 1198 * allocated extent containing the range [ alloc_offset .. 1199 * alloc_end ], and another free extent containing the 1200 * range ( alloc_end .. free_end ]. If either of the two 1201 * new free extents would be zero length, they are not created. 1202 * 1203 * Finally, the original free extent is removed. All newly 1204 * created extents have the EXTFLG_UPDATE flag set. 1205 */ 1206 static void 1207 meta_sp_alloc_by_ext( 1208 mdsetname_t *sp, 1209 mdname_t *np, 1210 sp_ext_node_t **head, 1211 sp_ext_node_t *free_ext, 1212 sp_ext_offset_t alloc_offset, 1213 sp_ext_length_t alloc_length, 1214 uint_t seq 1215 ) 1216 { 1217 sp_ext_offset_t free_offset = free_ext->ext_offset; 1218 sp_ext_length_t free_length = free_ext->ext_length; 1219 1220 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1221 sp_ext_offset_t free_end = free_offset + free_length; 1222 1223 /* allocated extent must be a subset of the free extent */ 1224 assert(free_offset <= alloc_offset); 1225 assert(free_end >= alloc_end); 1226 1227 meta_sp_list_remove(head, free_ext); 1228 1229 if (free_offset < alloc_offset) { 1230 meta_sp_list_insert(NULL, NULL, head, free_offset, 1231 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1232 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1233 } 1234 1235 if (free_end > alloc_end) { 1236 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1237 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1238 meta_sp_cmp_by_offset); 1239 } 1240 1241 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1242 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1243 1244 if (getenv(META_SP_DEBUG)) { 1245 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1246 meta_sp_list_dump(*head); 1247 } 1248 } 1249 1250 /* 1251 * FUNCTION: meta_sp_alloc_by_len() 1252 * INPUT: sp - the set name for the device the node belongs to 1253 * np - the name of the device the node belongs to 1254 * head - the head of the list, must be NULL for empty list 1255 * *lp - the requested length to allocate 1256 * last_off - the last offset already allocated. 1257 * alignment - the desired extent alignmeent 1258 * OUTPUT: head - the new head pointer 1259 * *lp - the length allocated 1260 * RETURNS: int - -1 if error, the number of new extents on success 1261 * PURPOSE: allocates extents from free space to satisfy the requested 1262 * length. If requested length is zero, allocates all 1263 * remaining free space. This function provides the meat 1264 * of the extent allocation algorithm. Allocation is a 1265 * three tier process: 1266 * 1267 * 1. If last_off is nonzero and there is free space following 1268 * that node, then it is extended to allocate as much of that 1269 * free space as possible. This is useful for metattach. 1270 * 2. If a free extent can be found to satisfy the remaining 1271 * requested space, then satisfy the rest of the request 1272 * from that extent. 1273 * 3. Start allocating space from any remaining free extents until 1274 * the remainder of the request is satisified. 1275 * 1276 * If alignment is non-zero, then every extent modified 1277 * or newly allocated will be aligned modulo alignment, 1278 * with a length that is an integer multiple of 1279 * alignment. 1280 * 1281 * The EXTFLG_UPDATE flag is set for all nodes (free and 1282 * allocated) that require updated watermarks. 1283 * 1284 * This algorithm may have a negative impact on fragmentation 1285 * in pathological cases and may be improved if it turns out 1286 * to be a problem. This may be exacerbated by particularly 1287 * large alignments. 1288 * 1289 * NOTE: It's confusing, so it demands an explanation: 1290 * - len is used to represent requested data space; it 1291 * does not include room for a watermark. On each full 1292 * or partial allocation, len will be decremented by 1293 * alloc_len (see next paragraph) until it reaches 1294 * zero. 1295 * - alloc_len is used to represent data space allocated 1296 * from a particular extent; it does not include space 1297 * for a watermark. In the rare event that a_length 1298 * (see next paragraph) is equal to MD_SP_WMSIZE, 1299 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1300 * fragment of space will be utterly unusable. 1301 * - a_length is used to represent all space to be 1302 * allocated from a particular extent; it DOES include 1303 * space for a watermark. 1304 */ 1305 static int 1306 meta_sp_alloc_by_len( 1307 mdsetname_t *sp, 1308 mdname_t *np, 1309 sp_ext_node_t **head, 1310 sp_ext_length_t *lp, 1311 sp_ext_offset_t last_off, 1312 sp_ext_offset_t alignment 1313 ) 1314 { 1315 sp_ext_node_t *free_ext; 1316 sp_ext_node_t *alloc_ext; 1317 uint_t last_seq = 0; 1318 uint_t numexts = 0; 1319 sp_ext_length_t freespace; 1320 sp_ext_length_t alloc_len; 1321 sp_ext_length_t len; 1322 1323 /* We're DOA if we can't read *lp */ 1324 assert(lp != NULL); 1325 len = *lp; 1326 1327 /* 1328 * Process the nominal case first: we've been given an actual 1329 * size argument, rather than the literal "all" 1330 */ 1331 1332 if (len != 0) { 1333 1334 /* 1335 * Short circuit the check for free space. This may 1336 * tell us we have enough space when we really don't 1337 * because each extent loses space to a watermark, but 1338 * it will always tell us there isn't enough space 1339 * correctly. Worst case we do some extra work. 1340 */ 1341 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1342 INCLUDE_WM); 1343 1344 if (freespace < len) 1345 return (-1); 1346 1347 /* 1348 * First see if we can extend the last extent for an 1349 * attach. 1350 */ 1351 if (last_off != 0LL) { 1352 int align = 0; 1353 1354 alloc_ext = 1355 meta_sp_list_find(*head, last_off); 1356 assert(alloc_ext != NULL); 1357 1358 /* 1359 * The offset test reflects the 1360 * inclusion of the watermark in the extent 1361 */ 1362 align = (alignment > 0) && 1363 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1364 alignment) == 0); 1365 1366 /* 1367 * If we decided not to align here, we should 1368 * also reset "alignment" so we don't bother 1369 * later, either. 1370 */ 1371 if (!align) { 1372 alignment = 0; 1373 } 1374 1375 last_seq = alloc_ext->ext_seq; 1376 1377 free_ext = meta_sp_list_find(*head, 1378 alloc_ext->ext_offset + 1379 alloc_ext->ext_length); 1380 1381 /* 1382 * If a free extent follows our last allocated 1383 * extent, then remove the last allocated 1384 * extent and increase the size of the free 1385 * extent to overlap it, then allocate the 1386 * total space from the new free extent. 1387 */ 1388 if (free_ext != NULL && 1389 free_ext->ext_type == EXTTYP_FREE) { 1390 assert(free_ext->ext_offset == 1391 alloc_ext->ext_offset + 1392 alloc_ext->ext_length); 1393 1394 alloc_len = 1395 MIN(len, free_ext->ext_length); 1396 1397 if (align && (alloc_len < len)) { 1398 /* No watermark space needed */ 1399 alloc_len -= alloc_len % alignment; 1400 } 1401 1402 if (alloc_len > 0) { 1403 free_ext->ext_offset -= 1404 alloc_ext->ext_length; 1405 free_ext->ext_length += 1406 alloc_ext->ext_length; 1407 1408 meta_sp_alloc_by_ext(sp, np, head, 1409 free_ext, free_ext->ext_offset, 1410 alloc_ext->ext_length + alloc_len, 1411 last_seq); 1412 1413 /* 1414 * now remove the original allocated 1415 * node. We may have overlapping 1416 * extents for a short time before 1417 * this node is removed. 1418 */ 1419 meta_sp_list_remove(head, alloc_ext); 1420 len -= alloc_len; 1421 } 1422 } 1423 last_seq++; 1424 } 1425 1426 if (len == 0LL) 1427 goto out; 1428 1429 /* 1430 * Next, see if we can find a single allocation for 1431 * the remainder. This may make fragmentation worse 1432 * in some cases, but there's no good way to allocate 1433 * that doesn't have a highly fragmented corner case. 1434 */ 1435 for (free_ext = *head; free_ext != NULL; 1436 free_ext = free_ext->ext_next) { 1437 sp_ext_offset_t a_offset; 1438 sp_ext_offset_t a_length; 1439 1440 if (free_ext->ext_type != EXTTYP_FREE) 1441 continue; 1442 1443 /* 1444 * The length test should include space for 1445 * the watermark 1446 */ 1447 1448 a_offset = free_ext->ext_offset; 1449 a_length = free_ext->ext_length; 1450 1451 if (alignment > 0) { 1452 1453 /* 1454 * Shortcut for extents that have been 1455 * previously added to pad out the 1456 * data space 1457 */ 1458 if (a_length < alignment) { 1459 continue; 1460 } 1461 1462 /* 1463 * Round up so the data space begins 1464 * on a properly aligned boundary. 1465 */ 1466 a_offset += alignment - 1467 (a_offset % alignment) - MD_SP_WMSIZE; 1468 1469 /* 1470 * This is only necessary in case the 1471 * watermark size is ever greater than 1472 * one. It'll never happen, of 1473 * course; we'll get rid of watermarks 1474 * before we make 'em bigger. 1475 */ 1476 if (a_offset < free_ext->ext_offset) { 1477 a_offset += alignment; 1478 } 1479 1480 /* 1481 * Adjust the length to account for 1482 * the space lost above (if any) 1483 */ 1484 a_length -= 1485 (a_offset - free_ext->ext_offset); 1486 } 1487 1488 if (a_length >= len + MD_SP_WMSIZE) { 1489 meta_sp_alloc_by_ext(sp, np, head, 1490 free_ext, a_offset, 1491 len + MD_SP_WMSIZE, last_seq); 1492 1493 len = 0LL; 1494 numexts++; 1495 break; 1496 } 1497 } 1498 1499 if (len == 0LL) 1500 goto out; 1501 1502 1503 /* 1504 * If the request could not be satisfied by extending 1505 * the last extent or by a single extent, then put 1506 * multiple smaller extents together until the request 1507 * is satisfied. 1508 */ 1509 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1510 free_ext = free_ext->ext_next) { 1511 sp_ext_offset_t a_offset; 1512 sp_ext_length_t a_length; 1513 1514 if (free_ext->ext_type != EXTTYP_FREE) 1515 continue; 1516 1517 a_offset = free_ext->ext_offset; 1518 a_length = free_ext->ext_length; 1519 1520 if (alignment > 0) { 1521 1522 /* 1523 * Shortcut for extents that have been 1524 * previously added to pad out the 1525 * data space 1526 */ 1527 if (a_length < alignment) { 1528 continue; 1529 } 1530 1531 /* 1532 * Round up so the data space begins 1533 * on a properly aligned boundary. 1534 */ 1535 a_offset += alignment - 1536 (a_offset % alignment) - MD_SP_WMSIZE; 1537 1538 /* 1539 * This is only necessary in case the 1540 * watermark size is ever greater than 1541 * one. It'll never happen, of 1542 * course; we'll get rid of watermarks 1543 * before we make 'em bigger. 1544 */ 1545 if (a_offset < free_ext->ext_offset) { 1546 a_offset += alignment; 1547 } 1548 1549 /* 1550 * Adjust the length to account for 1551 * the space lost above (if any) 1552 */ 1553 a_length -= 1554 (a_offset - free_ext->ext_offset); 1555 1556 /* 1557 * Adjust the length to be properly 1558 * aligned if it is NOT to be the 1559 * last extent in the soft partition. 1560 */ 1561 if ((a_length - MD_SP_WMSIZE) < len) 1562 a_length -= 1563 (a_length - MD_SP_WMSIZE) 1564 % alignment; 1565 } 1566 1567 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1568 if (alloc_len == 0) 1569 continue; 1570 1571 /* 1572 * meta_sp_alloc_by_ext() expects the 1573 * allocation length to include the watermark 1574 * size, which is why we don't simply pass in 1575 * alloc_len here. 1576 */ 1577 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1578 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1579 last_seq); 1580 1581 len -= alloc_len; 1582 numexts++; 1583 last_seq++; 1584 } 1585 1586 1587 /* 1588 * If there was not enough space we can throw it all 1589 * away since no real work has been done yet. 1590 */ 1591 if (len != 0) { 1592 meta_sp_list_free(head); 1593 return (-1); 1594 } 1595 } 1596 1597 /* 1598 * Otherwise, the literal "all" was specified: allocate all 1599 * available free space. Don't bother with alignment. 1600 */ 1601 else { 1602 /* First, extend the last extent if this is a grow */ 1603 if (last_off != 0LL) { 1604 alloc_ext = 1605 meta_sp_list_find(*head, last_off); 1606 assert(alloc_ext != NULL); 1607 1608 last_seq = alloc_ext->ext_seq; 1609 1610 free_ext = meta_sp_list_find(*head, 1611 alloc_ext->ext_offset + 1612 alloc_ext->ext_length); 1613 1614 /* 1615 * If a free extent follows our last allocated 1616 * extent, then remove the last allocated 1617 * extent and increase the size of the free 1618 * extent to overlap it, then allocate the 1619 * total space from the new free extent. 1620 */ 1621 if (free_ext != NULL && 1622 free_ext->ext_type == EXTTYP_FREE) { 1623 assert(free_ext->ext_offset == 1624 alloc_ext->ext_offset + 1625 alloc_ext->ext_length); 1626 1627 len = alloc_len = 1628 free_ext->ext_length; 1629 1630 free_ext->ext_offset -= 1631 alloc_ext->ext_length; 1632 free_ext->ext_length += 1633 alloc_ext->ext_length; 1634 1635 meta_sp_alloc_by_ext(sp, np, head, 1636 free_ext, free_ext->ext_offset, 1637 alloc_ext->ext_length + alloc_len, 1638 last_seq); 1639 1640 /* 1641 * now remove the original allocated 1642 * node. We may have overlapping 1643 * extents for a short time before 1644 * this node is removed. 1645 */ 1646 meta_sp_list_remove(head, alloc_ext); 1647 } 1648 1649 last_seq++; 1650 } 1651 1652 /* Next, grab all remaining free space */ 1653 for (free_ext = *head; free_ext != NULL; 1654 free_ext = free_ext->ext_next) { 1655 1656 if (free_ext->ext_type == EXTTYP_FREE) { 1657 alloc_len = 1658 free_ext->ext_length - MD_SP_WMSIZE; 1659 if (alloc_len == 0) 1660 continue; 1661 1662 /* 1663 * meta_sp_alloc_by_ext() expects the 1664 * allocation length to include the 1665 * watermark size, which is why we 1666 * don't simply pass in alloc_len 1667 * here. 1668 */ 1669 meta_sp_alloc_by_ext(sp, np, head, 1670 free_ext, free_ext->ext_offset, 1671 free_ext->ext_length, 1672 last_seq); 1673 1674 len += alloc_len; 1675 numexts++; 1676 last_seq++; 1677 } 1678 } 1679 } 1680 1681 out: 1682 if (getenv(META_SP_DEBUG)) { 1683 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1684 "allocation:\n"); 1685 meta_sp_list_dump(*head); 1686 } 1687 1688 if (*lp == 0) { 1689 *lp = len; 1690 1691 /* 1692 * Make sure the callers hit a no space error if we 1693 * didn't actually find anything. 1694 */ 1695 if (len == 0) { 1696 return (-1); 1697 } 1698 } 1699 1700 return (numexts); 1701 } 1702 1703 /* 1704 * FUNCTION: meta_sp_alloc_by_list() 1705 * INPUT: sp - the set name for the device the node belongs to 1706 * np - the name of the device the node belongs to 1707 * head - the head of the list, must be NULL for empty list 1708 * oblist - an extent list containing requested nodes to allocate 1709 * OUTPUT: head - the new head pointer 1710 * RETURNS: int - -1 if error, the number of new extents on success 1711 * PURPOSE: allocates extents from free space to satisfy the requested 1712 * extent list. This is primarily used for the -o/-b options 1713 * where the user may specifically request extents to allocate. 1714 * Each extent in the oblist must be a subset (inclusive) of a 1715 * free extent and may not overlap each other. This 1716 * function sets the EXTFLG_UPDATE flag for each node that 1717 * requires a watermark update after allocating. 1718 */ 1719 static int 1720 meta_sp_alloc_by_list( 1721 mdsetname_t *sp, 1722 mdname_t *np, 1723 sp_ext_node_t **head, 1724 sp_ext_node_t *oblist 1725 ) 1726 { 1727 sp_ext_node_t *ext; 1728 sp_ext_node_t *free_ext; 1729 uint_t numexts = 0; 1730 1731 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1732 1733 free_ext = meta_sp_list_find(*head, 1734 ext->ext_offset - MD_SP_WMSIZE); 1735 1736 /* Make sure the allocation is within the free extent */ 1737 if ((free_ext == NULL) || 1738 (ext->ext_offset + ext->ext_length > 1739 free_ext->ext_offset + free_ext->ext_length) || 1740 (free_ext->ext_type != EXTTYP_FREE)) 1741 return (-1); 1742 1743 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1744 ext->ext_offset - MD_SP_WMSIZE, 1745 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1746 1747 numexts++; 1748 } 1749 1750 assert(meta_sp_list_overlaps(*head) == 0); 1751 1752 if (getenv(META_SP_DEBUG)) { 1753 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1754 "allocation:\n"); 1755 meta_sp_list_dump(*head); 1756 } 1757 1758 return (numexts); 1759 } 1760 1761 /* 1762 * ************************************************************************** 1763 * Extent List Population Functions * 1764 * ************************************************************************** 1765 */ 1766 1767 /* 1768 * FUNCTION: meta_sp_extlist_from_namelist() 1769 * INPUT: sp - the set name for the device the node belongs to 1770 * spnplp - the namelist of soft partitions to build a list from 1771 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1772 * ep - return error pointer 1773 * RETURNS: int - -1 if error, 0 on success 1774 * PURPOSE: builds an extent list representing the soft partitions 1775 * specified in the namelist. Each extent in each soft 1776 * partition is added to the list with the type EXTTYP_ALLOC. 1777 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1778 * extent in the list includes the space occupied by the 1779 * watermark, which is not included in the unit structures. 1780 */ 1781 static int 1782 meta_sp_extlist_from_namelist( 1783 mdsetname_t *sp, 1784 mdnamelist_t *spnlp, 1785 sp_ext_node_t **extlist, 1786 md_error_t *ep 1787 ) 1788 { 1789 int extn; 1790 md_sp_t *msp; /* unit structure of the sp's */ 1791 mdnamelist_t *namep; 1792 1793 assert(sp != NULL); 1794 1795 /* 1796 * Now go through the soft partitions and add a node to the used 1797 * list for each allocated extent. 1798 */ 1799 for (namep = spnlp; namep != NULL; namep = namep->next) { 1800 mdname_t *curnp = namep->namep; 1801 1802 /* get the unit structure */ 1803 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1804 return (-1); 1805 1806 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1807 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1808 1809 /* 1810 * subtract from offset and add to the length 1811 * to account for the watermark, which is not 1812 * contained in the extents in the unit structure. 1813 */ 1814 meta_sp_list_insert(sp, curnp, extlist, 1815 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1816 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1817 } 1818 } 1819 return (0); 1820 } 1821 1822 /* 1823 * FUNCTION: meta_sp_extlist_from_wm() 1824 * INPUT: sp - the set name for the device the node belongs to 1825 * compnp - the name of the device to scan watermarks on 1826 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1827 * ep - return error pointer 1828 * RETURNS: int - -1 if error, 0 on success 1829 * PURPOSE: builds an extent list representing the soft partitions 1830 * specified in the namelist. Each extent in each soft 1831 * partition is added to the list with the type EXTTYP_ALLOC. 1832 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1833 * extent in the list includes the space occupied by the 1834 * watermark, which is not included in the unit structures. 1835 */ 1836 static int 1837 meta_sp_extlist_from_wm( 1838 mdsetname_t *sp, 1839 mdname_t *compnp, 1840 sp_ext_node_t **extlist, 1841 ext_cmpfunc_t compare, 1842 md_error_t *ep 1843 ) 1844 { 1845 mp_watermark_t wm; 1846 mdname_t *np = NULL; 1847 mdsetname_t *spsetp = NULL; 1848 sp_ext_offset_t cur_off; 1849 md_set_desc *sd; 1850 int init = 0; 1851 mdkey_t key; 1852 minor_t mnum; 1853 1854 if (!metaislocalset(sp)) { 1855 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1856 return (-1); 1857 } 1858 1859 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1860 return (-1); 1861 1862 for (;;) { 1863 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1864 return (-1); 1865 } 1866 1867 /* get the set and name pointers */ 1868 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1869 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1870 return (-1); 1871 } 1872 } 1873 1874 /* 1875 * For the MN set, meta_init_make_device needs to 1876 * be run on all the nodes so the entries for the 1877 * softpart device name and its comp can be created 1878 * in the same order in the replica namespace. If 1879 * we have it run on mdmn_do_iocset then the mddbs 1880 * will be out of sync between master node and slave 1881 * nodes. 1882 */ 1883 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1884 1885 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1886 md_mn_msg_addmdname_t *send_params; 1887 int result; 1888 md_mn_result_t *resp = NULL; 1889 int message_size; 1890 1891 message_size = sizeof (*send_params) + 1892 strlen(wm.wm_mdname) + 1; 1893 send_params = Zalloc(message_size); 1894 send_params->addmdname_setno = sp->setno; 1895 (void) strcpy(&send_params->addmdname_name[0], 1896 wm.wm_mdname); 1897 result = mdmn_send_message(sp->setno, 1898 MD_MN_MSG_ADDMDNAME, 1899 MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, 1900 (char *)send_params, message_size, &resp, 1901 ep); 1902 Free(send_params); 1903 if (resp != NULL) { 1904 if (resp->mmr_exitval != 0) { 1905 free_result(resp); 1906 return (-1); 1907 } 1908 free_result(resp); 1909 } 1910 if (result != 0) 1911 return (-1); 1912 } else { 1913 1914 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1915 if ((key = meta_init_make_device(&sp, 1916 wm.wm_mdname, ep)) <= 0) { 1917 return (-1); 1918 } 1919 init = 1; 1920 } 1921 } 1922 1923 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1924 if (np == NULL) { 1925 if (init) { 1926 if (meta_getnmentbykey(sp->setno, 1927 MD_SIDEWILD, key, NULL, &mnum, 1928 NULL, ep) != NULL) { 1929 (void) metaioctl(MD_IOCREM_DEV, 1930 &mnum, ep, NULL); 1931 } 1932 (void) del_self_name(sp, key, ep); 1933 } 1934 return (-1); 1935 } 1936 } 1937 1938 /* insert watermark into extent list */ 1939 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1940 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1941 EXTFLG_UPDATE, compare); 1942 1943 /* if we see the end watermark, we're done */ 1944 if (wm.wm_type == EXTTYP_END) 1945 break; 1946 1947 cur_off += wm.wm_length + 1; 1948 1949 /* clear out set and name pointers for next iteration */ 1950 np = NULL; 1951 spsetp = NULL; 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * ************************************************************************** 1959 * Print (metastat) Functions * 1960 * ************************************************************************** 1961 */ 1962 1963 /* 1964 * FUNCTION: meta_sp_short_print() 1965 * INPUT: msp - the unit structure to display 1966 * fp - the file pointer to send output to 1967 * options - print options from the command line processor 1968 * OUTPUT: ep - return error pointer 1969 * RETURNS: int - -1 if error, 0 on success 1970 * PURPOSE: display a short report of the soft partition in md.tab 1971 * form, primarily used for metastat -p. 1972 */ 1973 static int 1974 meta_sp_short_print( 1975 md_sp_t *msp, 1976 char *fname, 1977 FILE *fp, 1978 mdprtopts_t options, 1979 md_error_t *ep 1980 ) 1981 { 1982 int extn; 1983 1984 if (options & PRINT_LARGEDEVICES) { 1985 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1986 return (0); 1987 } 1988 1989 if (options & PRINT_FN) { 1990 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1991 return (0); 1992 } 1993 1994 /* print name and -p */ 1995 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1996 return (mdsyserror(ep, errno, fname)); 1997 1998 /* print the component */ 1999 /* 2000 * Always print the full path name 2001 */ 2002 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2003 return (mdsyserror(ep, errno, fname)); 2004 2005 /* print out each extent */ 2006 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2007 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2008 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2009 extp->len) == EOF) 2010 return (mdsyserror(ep, errno, fname)); 2011 } 2012 2013 if (fprintf(fp, "\n") == EOF) 2014 return (mdsyserror(ep, errno, fname)); 2015 2016 /* success */ 2017 return (0); 2018 } 2019 2020 /* 2021 * FUNCTION: meta_sp_status_to_name() 2022 * INPUT: xsp_status - the status value to convert to a string 2023 * tstate - transient errored device state. If set the 2024 * device is Unavailable 2025 * OUTPUT: none 2026 * RETURNS: char * - a pointer to the string representing the status value 2027 * PURPOSE: return an internationalized string representing the 2028 * status value for a soft partition. The strings are 2029 * strdup'd and must be freed by the caller. 2030 */ 2031 static char * 2032 meta_sp_status_to_name( 2033 xsp_status_t xsp_status, 2034 uint_t tstate 2035 ) 2036 { 2037 char *rval = NULL; 2038 2039 /* 2040 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2041 * value for an 'Unavailable' return. tstate can be set because of 2042 * other multi-node reasons (e.g. ABR being set) 2043 */ 2044 if (tstate & MD_INACCESSIBLE) { 2045 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2046 } 2047 2048 switch (xsp_status) { 2049 case MD_SP_CREATEPEND: 2050 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2051 break; 2052 case MD_SP_GROWPEND: 2053 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2054 break; 2055 case MD_SP_DELPEND: 2056 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2057 break; 2058 case MD_SP_OK: 2059 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2060 break; 2061 case MD_SP_ERR: 2062 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2063 break; 2064 case MD_SP_RECOVER: 2065 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2066 break; 2067 } 2068 2069 if (rval == NULL) 2070 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2071 2072 return (rval); 2073 } 2074 2075 /* 2076 * FUNCTION: meta_sp_report() 2077 * INPUT: sp - the set name for the unit being displayed 2078 * msp - the unit structure to display 2079 * nlpp - pass back the large devs 2080 * fp - the file pointer to send output to 2081 * options - print options from the command line processor 2082 * OUTPUT: ep - return error pointer 2083 * RETURNS: int - -1 if error, 0 on success 2084 * PURPOSE: print a full report of the device specified 2085 */ 2086 static int 2087 meta_sp_report( 2088 mdsetname_t *sp, 2089 md_sp_t *msp, 2090 mdnamelist_t **nlpp, 2091 char *fname, 2092 FILE *fp, 2093 mdprtopts_t options, 2094 md_error_t *ep 2095 ) 2096 { 2097 uint_t extn; 2098 char *status; 2099 char *devid = ""; 2100 mdname_t *didnp = NULL; 2101 ddi_devid_t dtp; 2102 int len; 2103 uint_t tstate = 0; 2104 2105 if (options & PRINT_LARGEDEVICES) { 2106 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2107 return (0); 2108 } else { 2109 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2110 return (-1); 2111 } 2112 } 2113 2114 if (options & PRINT_FN) { 2115 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2116 return (0); 2117 } else { 2118 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2119 return (-1); 2120 } 2121 } 2122 2123 if (options & PRINT_HEADER) { 2124 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2125 msp->common.namep->cname) == EOF) 2126 return (mdsyserror(ep, errno, fname)); 2127 } 2128 2129 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2130 msp->compnamep->cname) == EOF) 2131 return (mdsyserror(ep, errno, fname)); 2132 2133 /* Determine if device is available before displaying status */ 2134 if (metaismeta(msp->common.namep)) { 2135 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2136 return (-1); 2137 } 2138 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2139 2140 /* print out "State" to be consistent with other metadevices */ 2141 if (tstate & MD_ABR_CAP) { 2142 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2143 " State: %s - Application Based Recovery (ABR)\n"), 2144 status) == EOF) { 2145 Free(status); 2146 return (mdsyserror(ep, errno, fname)); 2147 } 2148 } else { 2149 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2150 " State: %s\n"), status) == EOF) { 2151 Free(status); 2152 return (mdsyserror(ep, errno, fname)); 2153 } 2154 } 2155 free(status); 2156 2157 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2158 msp->common.size, 2159 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2160 return (mdsyserror(ep, errno, fname)); 2161 2162 /* print component details */ 2163 if (! metaismeta(msp->compnamep)) { 2164 diskaddr_t start_blk; 2165 int has_mddb; 2166 char *has_mddb_str; 2167 2168 /* print header */ 2169 /* 2170 * Building a format string on the fly that will 2171 * be used in (f)printf. This allows the length 2172 * of the ctd to vary from small to large without 2173 * looking horrible. 2174 */ 2175 len = strlen(msp->compnamep->cname); 2176 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2177 len += 2; 2178 if (fprintf(fp, 2179 "\t%-*.*s %-12.12s %-5.5s %s\n", 2180 len, len, 2181 dgettext(TEXT_DOMAIN, "Device"), 2182 dgettext(TEXT_DOMAIN, "Start Block"), 2183 dgettext(TEXT_DOMAIN, "Dbase"), 2184 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2185 return (mdsyserror(ep, errno, fname)); 2186 } 2187 2188 2189 /* get info */ 2190 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2191 MD_DISKADDR_ERROR) 2192 return (-1); 2193 2194 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2195 return (-1); 2196 2197 if (has_mddb) 2198 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2199 else 2200 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2201 2202 /* populate the key in the name_p structure */ 2203 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2204 if (didnp == NULL) { 2205 return (-1); 2206 } 2207 2208 /* determine if devid does NOT exist */ 2209 if (options & PRINT_DEVID) { 2210 if ((dtp = meta_getdidbykey(sp->setno, 2211 getmyside(sp, ep), didnp->key, ep)) == NULL) 2212 devid = dgettext(TEXT_DOMAIN, "No "); 2213 else { 2214 devid = dgettext(TEXT_DOMAIN, "Yes"); 2215 free(dtp); 2216 } 2217 } 2218 2219 /* print info */ 2220 /* 2221 * This allows the length 2222 * of the ctd to vary from small to large without 2223 * looking horrible. 2224 */ 2225 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2226 len, msp->compnamep->cname, 2227 start_blk, has_mddb_str, devid) == EOF) { 2228 return (mdsyserror(ep, errno, fname)); 2229 } 2230 (void) fprintf(fp, "\n"); 2231 } 2232 2233 2234 /* print the headers */ 2235 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2236 dgettext(TEXT_DOMAIN, "Extent"), 2237 dgettext(TEXT_DOMAIN, "Start Block"), 2238 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2239 return (mdsyserror(ep, errno, fname)); 2240 2241 /* print out each extent */ 2242 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2243 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2244 2245 /* If PRINT_TIMES option is ever supported, add output here */ 2246 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2247 extn, extp->poff, extp->len) == EOF) 2248 return (mdsyserror(ep, errno, fname)); 2249 } 2250 2251 /* separate records with a newline */ 2252 (void) fprintf(fp, "\n"); 2253 return (0); 2254 } 2255 2256 /* 2257 * FUNCTION: meta_sp_print() 2258 * INPUT: sp - the set name for the unit being displayed 2259 * np - the name of the device to print 2260 * fname - ??? not used 2261 * fp - the file pointer to send output to 2262 * options - print options from the command line processor 2263 * OUTPUT: ep - return error pointer 2264 * RETURNS: int - -1 if error, 0 on success 2265 * PURPOSE: print a full report of the device specified by metastat. 2266 * This is the main entry point for printing. 2267 */ 2268 int 2269 meta_sp_print( 2270 mdsetname_t *sp, 2271 mdname_t *np, 2272 mdnamelist_t **nlpp, 2273 char *fname, 2274 FILE *fp, 2275 mdprtopts_t options, 2276 md_error_t *ep 2277 ) 2278 { 2279 md_sp_t *msp; 2280 md_unit_t *mdp; 2281 int rval = 0; 2282 2283 /* should always have the same set */ 2284 assert(sp != NULL); 2285 2286 /* print all the soft partitions */ 2287 if (np == NULL) { 2288 mdnamelist_t *nlp = NULL; 2289 mdnamelist_t *p; 2290 int cnt; 2291 2292 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2293 return (-1); 2294 else if (cnt == 0) 2295 return (0); 2296 2297 /* recusively print them out */ 2298 for (p = nlp; (p != NULL); p = p->next) { 2299 mdname_t *curnp = p->namep; 2300 2301 /* 2302 * one problem with the rval of -1 here is that 2303 * the error gets "lost" when the next device is 2304 * printed, but we want to print them all anyway. 2305 */ 2306 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2307 options, ep); 2308 } 2309 2310 /* clean up, return success */ 2311 metafreenamelist(nlp); 2312 return (rval); 2313 } 2314 2315 /* get the unit structure */ 2316 if ((msp = meta_get_sp_common(sp, np, 2317 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2318 return (-1); 2319 2320 /* check for parented */ 2321 if ((! (options & PRINT_SUBDEVS)) && 2322 (MD_HAS_PARENT(msp->common.parent))) { 2323 return (0); 2324 } 2325 2326 /* print appropriate detail */ 2327 if (options & PRINT_SHORT) { 2328 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2329 return (-1); 2330 } else { 2331 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2332 return (-1); 2333 } 2334 2335 /* 2336 * Print underlying metadevices if they are parented to us and 2337 * if the info for the underlying metadevice has not been printed. 2338 */ 2339 if (metaismeta(msp->compnamep)) { 2340 /* get the unit structure for the subdevice */ 2341 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2342 return (-1); 2343 2344 /* If info not already printed, recurse */ 2345 if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) { 2346 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2347 (options | PRINT_HEADER | PRINT_SUBDEVS), 2348 NULL, ep) != 0) { 2349 return (-1); 2350 } 2351 BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp))); 2352 } 2353 } 2354 return (0); 2355 } 2356 2357 /* 2358 * ************************************************************************** 2359 * Watermark Manipulation Functions * 2360 * ************************************************************************** 2361 */ 2362 2363 /* 2364 * FUNCTION: meta_sp_get_start() 2365 * INPUT: sp - the operating set 2366 * np - device upon which the sp is being built 2367 * OUTPUT: ep - return error pointer 2368 * RETURNS: daddr_t - -1 if error, otherwise the start block 2369 * PURPOSE: Encapsulate the determination of the start block of the 2370 * device upon which the sp is built or being built. 2371 */ 2372 static diskaddr_t 2373 meta_sp_get_start( 2374 mdsetname_t *sp, 2375 mdname_t *np, 2376 md_error_t *ep 2377 ) 2378 { 2379 daddr_t start_block; 2380 2381 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) 2382 start_block += MD_SP_START; 2383 2384 return (start_block); 2385 } 2386 2387 /* 2388 * FUNCTION: meta_sp_update_wm_common() 2389 * INPUT: sp - the operating set 2390 * msp - a pointer to the XDR unit structure 2391 * extlist - the extent list specifying watermarks to update 2392 * iocval - either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM 2393 * OUTPUT: ep - return error pointer 2394 * RETURNS: int - -1 if error, 0 on success 2395 * PURPOSE: steps backwards through the extent list updating 2396 * watermarks for all extents with the EXTFLG_UPDATE flag 2397 * set. Writing the watermarks guarantees consistency when 2398 * extents must be broken into pieces since the original 2399 * watermark will be the last to be updated, and will be 2400 * changed to point to a new watermark that is already 2401 * known to be consistent. If one of the writes fails, the 2402 * original watermark stays intact and none of the changes 2403 * are realized. 2404 */ 2405 static int 2406 meta_sp_update_wm_common( 2407 mdsetname_t *sp, 2408 md_sp_t *msp, 2409 sp_ext_node_t *extlist, 2410 int iocval, 2411 md_error_t *ep 2412 ) 2413 { 2414 sp_ext_node_t *ext; 2415 sp_ext_node_t *tail; 2416 mp_watermark_t *wmp, *watermarks; 2417 xsp_offset_t *osp, *offsets; 2418 int update_count = 0; 2419 int rval = 0; 2420 md_unit_t *mdp; 2421 md_sp_update_wm_t update_params; 2422 2423 if (getenv(META_SP_DEBUG)) { 2424 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2425 meta_sp_list_dump(extlist); 2426 } 2427 2428 /* 2429 * find the last node so we can write the watermarks backwards 2430 * and count watermarks to update so we can allocate space 2431 */ 2432 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2433 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2434 update_count++; 2435 } 2436 2437 if (ext->ext_next == NULL) { 2438 tail = ext; 2439 } 2440 } 2441 ext = tail; 2442 2443 wmp = watermarks = 2444 Zalloc(update_count * sizeof (mp_watermark_t)); 2445 osp = offsets = 2446 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2447 2448 while (ext != NULL) { 2449 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2450 /* update watermark */ 2451 wmp->wm_magic = MD_SP_MAGIC; 2452 wmp->wm_version = MD_SP_VERSION; 2453 wmp->wm_type = ext->ext_type; 2454 wmp->wm_seq = ext->ext_seq; 2455 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2456 2457 /* fill in the volume name and set name */ 2458 if (ext->ext_namep != NULL) 2459 (void) strcpy(wmp->wm_mdname, 2460 ext->ext_namep->cname); 2461 else 2462 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2463 if (ext->ext_setp != NULL && 2464 ext->ext_setp->setno != MD_LOCAL_SET) 2465 (void) strcpy(wmp->wm_setname, 2466 ext->ext_setp->setname); 2467 else 2468 (void) strcpy(wmp->wm_setname, 2469 MD_SP_LOCALSETNAME); 2470 2471 /* Generate the checksum */ 2472 wmp->wm_checksum = 0; 2473 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2474 sizeof (*wmp), NULL); 2475 2476 /* record the extent offset */ 2477 *osp = ext->ext_offset; 2478 2479 /* Advance the placeholders */ 2480 osp++; wmp++; 2481 } 2482 ext = ext->ext_prev; 2483 } 2484 2485 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2486 if (mdp == NULL) { 2487 rval = -1; 2488 goto out; 2489 } 2490 2491 (void) memset(&update_params, 0, sizeof (update_params)); 2492 update_params.mnum = MD_SID(mdp); 2493 update_params.count = update_count; 2494 update_params.wmp = (uintptr_t)watermarks; 2495 update_params.osp = (uintptr_t)offsets; 2496 MD_SETDRIVERNAME(&update_params, MD_SP, 2497 MD_MIN2SET(update_params.mnum)); 2498 2499 if (metaioctl(iocval, &update_params, &update_params.mde, 2500 msp->common.namep->cname) != 0) { 2501 (void) mdstealerror(ep, &update_params.mde); 2502 rval = -1; 2503 goto out; 2504 } 2505 2506 out: 2507 Free(watermarks); 2508 Free(offsets); 2509 2510 return (rval); 2511 } 2512 2513 static int 2514 meta_sp_update_wm( 2515 mdsetname_t *sp, 2516 md_sp_t *msp, 2517 sp_ext_node_t *extlist, 2518 md_error_t *ep 2519 ) 2520 { 2521 return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM, 2522 ep)); 2523 } 2524 2525 static int 2526 meta_mn_sp_update_wm( 2527 mdsetname_t *sp, 2528 md_sp_t *msp, 2529 sp_ext_node_t *extlist, 2530 md_error_t *ep 2531 ) 2532 { 2533 return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM, 2534 ep)); 2535 } 2536 2537 /* 2538 * FUNCTION: meta_sp_clear_wm() 2539 * INPUT: sp - the operating set 2540 * msp - the unit structure for the soft partition to clear 2541 * OUTPUT: ep - return error pointer 2542 * RETURNS: int - -1 if error, 0 on success 2543 * PURPOSE: steps through the extents for a soft partition unit and 2544 * creates an extent list designed to mark all of the 2545 * watermarks for those extents as free. The extent list 2546 * is then passed to meta_sp_update_wm() to actually write 2547 * the watermarks out. 2548 */ 2549 static int 2550 meta_sp_clear_wm( 2551 mdsetname_t *sp, 2552 md_sp_t *msp, 2553 md_error_t *ep 2554 ) 2555 { 2556 sp_ext_node_t *extlist = NULL; 2557 int numexts = msp->ext.ext_len; 2558 uint_t i; 2559 int rval = 0; 2560 2561 /* for each watermark must set the flag to SP_FREE */ 2562 for (i = 0; i < numexts; i++) { 2563 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2564 2565 meta_sp_list_insert(NULL, NULL, &extlist, 2566 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2567 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2568 } 2569 2570 /* update watermarks */ 2571 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2572 2573 meta_sp_list_free(&extlist); 2574 return (rval); 2575 } 2576 2577 /* 2578 * FUNCTION: meta_sp_read_wm() 2579 * INPUT: sp - setname for component 2580 * compnp - mdname_t for component 2581 * offset - the offset of the watermark to read (sectors) 2582 * OUTPUT: wm - the watermark structure to read into 2583 * ep - return error pointer 2584 * RETURNS: int - -1 if error, 0 on success 2585 * PURPOSE: seeks out to the requested offset and reads a watermark. 2586 * It then verifies that the magic number is correct and 2587 * that the checksum is valid, returning an error if either 2588 * is wrong. 2589 */ 2590 static int 2591 meta_sp_read_wm( 2592 mdsetname_t *sp, 2593 mdname_t *compnp, 2594 mp_watermark_t *wm, 2595 sp_ext_offset_t offset, 2596 md_error_t *ep 2597 ) 2598 { 2599 md_sp_read_wm_t read_params; 2600 2601 /* 2602 * make sure block offset does not overflow 2^64 bytes and it's a 2603 * multiple of the block size. 2604 */ 2605 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2606 /* LINTED */ 2607 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2608 2609 (void) memset(wm, 0, sizeof (*wm)); 2610 2611 (void) memset(&read_params, 0, sizeof (read_params)); 2612 read_params.rdev = compnp->dev; 2613 read_params.wmp = (uintptr_t)wm; 2614 read_params.offset = offset; 2615 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2616 2617 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2618 &read_params.mde, compnp->cname) != 0) { 2619 2620 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2621 "Extent header read failed, block %llu.\n"), offset); 2622 return (mdstealerror(ep, &read_params.mde)); 2623 } 2624 2625 /* make sure magic number is correct */ 2626 if (wm->wm_magic != MD_SP_MAGIC) { 2627 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2628 "found incorrect magic number %x, expected %x.\n"), 2629 wm->wm_magic, MD_SP_MAGIC); 2630 /* 2631 * Pass NULL for the device name as we don't have 2632 * valid watermark contents. 2633 */ 2634 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2635 } 2636 2637 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2638 sizeof (*wm), NULL)) { 2639 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2640 "found incorrect checksum %x.\n"), 2641 wm->wm_checksum); 2642 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2643 } 2644 2645 return (0); 2646 } 2647 2648 /* 2649 * ************************************************************************** 2650 * Query Functions 2651 * ************************************************************************** 2652 */ 2653 2654 /* 2655 * IMPORTANT NOTE: This is a static function that assumes that 2656 * its input parameters have been checked and 2657 * have valid values that lie within acceptable 2658 * ranges. 2659 * 2660 * FUNCTION: meta_sp_enough_space() 2661 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2662 * must be > 0 2663 * desired_sp_size - the desired soft partition size in blocks; 2664 * must be > 0 2665 * extent_listpp - a reference to a reference to an extent 2666 * list that lists the extents on a device; 2667 * must be a reference to a reference to a 2668 * valid extent list 2669 * alignment - the desired data space alignment for the sp's 2670 * OUTPUT: boolean_t return value 2671 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2672 * list to create the desired soft partitions, 2673 * B_FALSE if there's not enough space 2674 * PURPOSE: determines whether there's enough free space in an extent 2675 * list to allow creation of a set of soft partitions 2676 */ 2677 static boolean_t 2678 meta_sp_enough_space( 2679 int desired_number_of_sps, 2680 blkcnt_t desired_sp_size, 2681 sp_ext_node_t **extent_listpp, 2682 sp_ext_length_t alignment 2683 ) 2684 { 2685 boolean_t enough_space; 2686 int number_of_sps; 2687 int number_of_extents_used; 2688 sp_ext_length_t desired_ext_length = desired_sp_size; 2689 2690 enough_space = B_TRUE; 2691 number_of_sps = 0; 2692 while ((enough_space == B_TRUE) && 2693 (number_of_sps < desired_number_of_sps)) { 2694 /* 2695 * Use the extent allocation algorithm implemented by 2696 * meta_sp_alloc_by_len() to test whether the free 2697 * extents in the extent list referenced by *extent_listpp 2698 * contain enough space to accomodate a soft partition 2699 * of size desired_ext_length. 2700 * 2701 * Repeat the test <desired_number_of_sps> times 2702 * or until it fails, whichever comes first, 2703 * each time allocating the extents required to 2704 * create the soft partition without actually 2705 * creating the soft partition. 2706 */ 2707 number_of_extents_used = meta_sp_alloc_by_len( 2708 TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2709 extent_listpp, &desired_ext_length, 2710 NO_OFFSET, alignment); 2711 if (number_of_extents_used == -1) { 2712 enough_space = B_FALSE; 2713 } else { 2714 number_of_sps++; 2715 } 2716 } 2717 return (enough_space); 2718 } 2719 2720 /* 2721 * IMPORTANT NOTE: This is a static function that calls other functions 2722 * that check its mdsetnamep and device_mdnamep 2723 * input parameters, but expects extent_listpp to 2724 * be a initialized to a valid address to which 2725 * it can write a reference to the extent list that 2726 * it creates. 2727 * 2728 * FUNCTION: meta_sp_get_extent_list() 2729 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2730 * for the set containing the device for 2731 * which the extents are to be listed 2732 * device_mdnamep - a reference to the mdname_t structure 2733 * for the device for which the extents 2734 * are to be listed 2735 * OUTPUT: *extent_listpp - a reference to the extent list for 2736 * the device; NULL if the function fails 2737 * *ep - the libmeta error encountered, if any 2738 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2739 * B_FALSE if not 2740 * PURPOSE: gets the extent list for a device 2741 */ 2742 static boolean_t 2743 meta_sp_get_extent_list( 2744 mdsetname_t *mdsetnamep, 2745 mdname_t *device_mdnamep, 2746 sp_ext_node_t **extent_listpp, 2747 md_error_t *ep 2748 ) 2749 { 2750 diskaddr_t device_size_in_blocks; 2751 mdnamelist_t *sp_name_listp; 2752 diskaddr_t start_block_address_in_blocks; 2753 2754 *extent_listpp = NULL; 2755 sp_name_listp = NULL; 2756 2757 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2758 device_mdnamep, ep); 2759 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2760 if (getenv(META_SP_DEBUG)) { 2761 mde_perror(ep, 2762 "meta_sp_get_extent_list:meta_sp_get_start"); 2763 } 2764 return (B_FALSE); 2765 } 2766 2767 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2768 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2769 if (getenv(META_SP_DEBUG)) { 2770 mde_perror(ep, 2771 "meta_sp_get_extent_list:metagetsize"); 2772 } 2773 return (B_FALSE); 2774 } 2775 2776 /* 2777 * Sanity check: the start block will have skipped an integer 2778 * number of cylinders, C. C will usually be zero. If (C > 0), 2779 * and the disk slice happens to only be C cylinders in total 2780 * size, we'll fail this check. 2781 */ 2782 if (device_size_in_blocks <= 2783 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2784 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2785 return (B_FALSE); 2786 } 2787 2788 /* 2789 * After this point, we will have allocated resources, so any 2790 * failure returns must be through the supplied "fail" label 2791 * to properly deallocate things. 2792 */ 2793 2794 /* 2795 * Create an empty extent list that starts one watermark past 2796 * the start block of the device and ends one watermark before 2797 * the end of the device. 2798 */ 2799 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2800 extent_listpp, NO_OFFSET, 2801 (sp_ext_length_t)start_block_address_in_blocks, 2802 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2803 meta_sp_cmp_by_offset); 2804 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2805 extent_listpp, (sp_ext_offset_t)(device_size_in_blocks - 2806 MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, 2807 NO_FLAGS, meta_sp_cmp_by_offset); 2808 2809 /* 2810 * Get the list of soft partitions that are already on the 2811 * device. 2812 */ 2813 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2814 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2815 if (getenv(META_SP_DEBUG)) { 2816 mde_perror(ep, 2817 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2818 } 2819 goto fail; 2820 } 2821 2822 if (sp_name_listp != NULL) { 2823 /* 2824 * If there are soft partitions on the device, add the 2825 * extents used in them to the extent list. 2826 */ 2827 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2828 extent_listpp, ep) == -1) { 2829 if (getenv(META_SP_DEBUG)) { 2830 mde_perror(ep, "meta_sp_get_extent_list:" 2831 "meta_sp_extlist_from_namelist"); 2832 } 2833 goto fail; 2834 } 2835 metafreenamelist(sp_name_listp); 2836 } 2837 2838 /* 2839 * Add free extents to the extent list to represent 2840 * the remaining regions of free space on the 2841 * device. 2842 */ 2843 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2844 return (B_TRUE); 2845 2846 fail: 2847 if (sp_name_listp != NULL) { 2848 metafreenamelist(sp_name_listp); 2849 } 2850 2851 if (*extent_listpp != NULL) { 2852 /* 2853 * meta_sp_list_free sets *extent_listpp to NULL. 2854 */ 2855 meta_sp_list_free(extent_listpp); 2856 } 2857 return (B_FALSE); 2858 } 2859 2860 /* 2861 * IMPORTANT NOTE: This is a static function that calls other functions 2862 * that check its mdsetnamep and mddrivenamep 2863 * input parameters, but expects extent_listpp to 2864 * be a initialized to a valid address to which 2865 * it can write a reference to the extent list that 2866 * it creates. 2867 * 2868 * FUNCTION: meta_sp_get_extent_list_for_drive() 2869 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2870 * for the set containing the drive for 2871 * which the extents are to be listed 2872 * mddrivenamep - a reference to the mddrivename_t structure 2873 * for the drive for which the extents 2874 * are to be listed 2875 * OUTPUT: *extent_listpp - a reference to the extent list for 2876 * the drive; NULL if the function fails 2877 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2878 * B_FALSE if not 2879 * PURPOSE: gets the extent list for a drive when the entire drive 2880 * is to be soft partitioned 2881 */ 2882 static boolean_t 2883 meta_sp_get_extent_list_for_drive( 2884 mdsetname_t *mdsetnamep, 2885 mddrivename_t *mddrivenamep, 2886 sp_ext_node_t **extent_listpp 2887 ) 2888 { 2889 boolean_t can_use; 2890 diskaddr_t free_space; 2891 md_error_t mderror; 2892 mdvtoc_t proposed_vtoc; 2893 int repartition_options; 2894 int return_value; 2895 md_sp_t test_sp_struct; 2896 2897 can_use = B_TRUE; 2898 *extent_listpp = NULL; 2899 mderror = mdnullerror; 2900 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2901 &mderror); 2902 if (test_sp_struct.compnamep == NULL) { 2903 can_use = B_FALSE; 2904 } 2905 2906 if (can_use == B_TRUE) { 2907 mderror = mdnullerror; 2908 repartition_options = 0; 2909 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2910 MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror); 2911 if (return_value != 0) { 2912 can_use = B_FALSE; 2913 } 2914 } 2915 2916 if (can_use == B_TRUE) { 2917 mderror = mdnullerror; 2918 repartition_options = repartition_options | 2919 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2920 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2921 repartition_options, &proposed_vtoc, &mderror); 2922 if (return_value != 0) { 2923 can_use = B_FALSE; 2924 } 2925 } 2926 2927 if (can_use == B_TRUE) { 2928 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2929 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2930 can_use = B_FALSE; 2931 } 2932 } 2933 2934 if (can_use == B_TRUE) { 2935 /* 2936 * Create an extent list that starts with 2937 * a reserved extent that ends at the start 2938 * of the usable space on slice zero of the 2939 * proposed VTOC, ends with an extent that 2940 * reserves space for a watermark at the end 2941 * of slice zero, and contains a single free 2942 * extent that occupies the rest of the space 2943 * on the slice. 2944 * 2945 * NOTE: 2946 * 2947 * Don't use metagetstart() or metagetsize() to 2948 * find the usable space. They query the mdname_t 2949 * structure that represents an actual device to 2950 * determine the amount of space on the device that 2951 * contains metadata and the total amount of space 2952 * on the device. Since this function creates a 2953 * proposed extent list that doesn't reflect the 2954 * state of an actual device, there's no mdname_t 2955 * structure to be queried. 2956 * 2957 * When a drive is reformatted to prepare for 2958 * soft partitioning, all of slice seven is 2959 * reserved for metadata, all of slice zero is 2960 * available for soft partitioning, and all other 2961 * slices on the drive are empty. The proposed 2962 * extent list for the drive therefore contains 2963 * only three extents: a reserved extent that ends 2964 * at the start of the usable space on slice zero, 2965 * a single free extent that occupies all the usable 2966 * space on slice zero, and an ending extent that 2967 * reserves space for a watermark at the end of 2968 * slice zero. 2969 */ 2970 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2971 extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START), 2972 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2973 meta_sp_cmp_by_offset); 2974 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2975 extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2976 MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS, 2977 meta_sp_cmp_by_offset); 2978 meta_sp_list_freefill(extent_listpp, free_space); 2979 } 2980 return (can_use); 2981 } 2982 2983 /* 2984 * FUNCTION: meta_sp_can_create_sps() 2985 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2986 * for the set containing the device for 2987 * which the extents are to be listed 2988 * mdnamep - a reference to the mdname_t of the device 2989 * on which the soft parititions are to be created 2990 * number_of_sps - the desired number of soft partitions 2991 * sp_size - the desired soft partition size 2992 * OUTPUT: boolean_t return value 2993 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 2994 * B_FALSE if not 2995 * PURPOSE: determines whether a set of soft partitions can be created 2996 * on a device 2997 */ 2998 boolean_t 2999 meta_sp_can_create_sps( 3000 mdsetname_t *mdsetnamep, 3001 mdname_t *mdnamep, 3002 int number_of_sps, 3003 blkcnt_t sp_size 3004 ) 3005 { 3006 sp_ext_node_t *extent_listp; 3007 boolean_t succeeded; 3008 md_error_t mde; 3009 3010 if ((number_of_sps > 0) && (sp_size > 0)) { 3011 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3012 &extent_listp, &mde); 3013 } else { 3014 succeeded = B_FALSE; 3015 } 3016 3017 /* 3018 * We don't really care about an error return from the 3019 * alignment call; that will just result in passing zero, 3020 * which will be interpreted as no alignment. 3021 */ 3022 3023 if (succeeded == B_TRUE) { 3024 succeeded = meta_sp_enough_space(number_of_sps, 3025 sp_size, &extent_listp, 3026 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3027 meta_sp_list_free(&extent_listp); 3028 } 3029 return (succeeded); 3030 } 3031 3032 /* 3033 * FUNCTION: meta_sp_can_create_sps_on_drive() 3034 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3035 * for the set containing the drive for 3036 * which the extents are to be listed 3037 * mddrivenamep - a reference to the mddrivename_t of the drive 3038 * on which the soft parititions are to be created 3039 * number_of_sps - the desired number of soft partitions 3040 * sp_size - the desired soft partition size 3041 * OUTPUT: boolean_t return value 3042 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3043 * B_FALSE if not 3044 * PURPOSE: determines whether a set of soft partitions can be created 3045 * on a drive if the entire drive is soft partitioned 3046 */ 3047 boolean_t 3048 meta_sp_can_create_sps_on_drive( 3049 mdsetname_t *mdsetnamep, 3050 mddrivename_t *mddrivenamep, 3051 int number_of_sps, 3052 blkcnt_t sp_size 3053 ) 3054 { 3055 sp_ext_node_t *extent_listp; 3056 boolean_t succeeded; 3057 3058 if ((number_of_sps > 0) && (sp_size > 0)) { 3059 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3060 mddrivenamep, &extent_listp); 3061 } else { 3062 succeeded = B_FALSE; 3063 } 3064 3065 /* 3066 * We don't care about alignment on the space call because 3067 * we're specifically dealing with a drive, which will have no 3068 * inherent alignment. 3069 */ 3070 3071 if (succeeded == B_TRUE) { 3072 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3073 &extent_listp, SP_UNALIGNED); 3074 meta_sp_list_free(&extent_listp); 3075 } 3076 return (succeeded); 3077 } 3078 3079 /* 3080 * FUNCTION: meta_sp_get_free_space() 3081 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3082 * for the set containing the device for 3083 * which the free space is to be returned 3084 * mdnamep - a reference to the mdname_t of the device 3085 * for which the free space is to be returned 3086 * OUTPUT: blkcnt_t return value 3087 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3088 * PURPOSE: returns the number of blocks of free space on a device 3089 */ 3090 blkcnt_t 3091 meta_sp_get_free_space( 3092 mdsetname_t *mdsetnamep, 3093 mdname_t *mdnamep 3094 ) 3095 { 3096 sp_ext_node_t *extent_listp; 3097 sp_ext_length_t free_blocks; 3098 boolean_t succeeded; 3099 md_error_t mde; 3100 3101 extent_listp = NULL; 3102 free_blocks = 0; 3103 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3104 &extent_listp, &mde); 3105 if (succeeded == B_TRUE) { 3106 free_blocks = meta_sp_list_size(extent_listp, 3107 EXTTYP_FREE, INCLUDE_WM); 3108 meta_sp_list_free(&extent_listp); 3109 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3110 /* 3111 * Subtract a safety margin for watermarks when 3112 * computing the number of blocks available for 3113 * use. The actual number of watermarks can't 3114 * be calculated without knowing the exact numbers 3115 * and sizes of both the free extents and the soft 3116 * partitions to be created. The calculation is 3117 * highly complex and error-prone even if those 3118 * quantities are known. The approximate value 3119 * 10 * MD_SP_WMSIZE is within a few blocks of the 3120 * correct value in all practical cases. 3121 */ 3122 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3123 } else { 3124 free_blocks = 0; 3125 } 3126 } else { 3127 mdclrerror(&mde); 3128 } 3129 3130 return (free_blocks); 3131 } 3132 3133 /* 3134 * FUNCTION: meta_sp_get_free_space_on_drive() 3135 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3136 * for the set containing the drive for 3137 * which the free space is to be returned 3138 * mddrivenamep - a reference to the mddrivename_t of the drive 3139 * for which the free space is to be returned 3140 * OUTPUT: blkcnt_t return value 3141 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3142 * PURPOSE: returns the number of blocks of space usable for soft 3143 * partitions on an entire drive, if the entire drive is 3144 * soft partitioned 3145 */ 3146 blkcnt_t 3147 meta_sp_get_free_space_on_drive( 3148 mdsetname_t *mdsetnamep, 3149 mddrivename_t *mddrivenamep 3150 ) 3151 { 3152 sp_ext_node_t *extent_listp; 3153 sp_ext_length_t free_blocks; 3154 boolean_t succeeded; 3155 3156 extent_listp = NULL; 3157 free_blocks = 0; 3158 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3159 mddrivenamep, &extent_listp); 3160 if (succeeded == B_TRUE) { 3161 free_blocks = meta_sp_list_size(extent_listp, 3162 EXTTYP_FREE, INCLUDE_WM); 3163 meta_sp_list_free(&extent_listp); 3164 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3165 /* 3166 * Subtract a safety margin for watermarks when 3167 * computing the number of blocks available for 3168 * use. The actual number of watermarks can't 3169 * be calculated without knowing the exact numbers 3170 * and sizes of both the free extents and the soft 3171 * partitions to be created. The calculation is 3172 * highly complex and error-prone even if those 3173 * quantities are known. The approximate value 3174 * 10 * MD_SP_WMSIZE is within a few blocks of the 3175 * correct value in all practical cases. 3176 */ 3177 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3178 } else { 3179 free_blocks = 0; 3180 } 3181 } 3182 return (free_blocks); 3183 } 3184 3185 /* 3186 * FUNCTION: meta_sp_get_number_of_possible_sps() 3187 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3188 * for the set containing the device for 3189 * which the number of possible soft partitions 3190 * is to be returned 3191 * mdnamep - a reference to the mdname_t of the device 3192 * for which the number of possible soft partitions 3193 * is to be returned 3194 * OUTPUT: int return value 3195 * RETURNS: int - the number of soft partitions of the desired size 3196 * that can be created on the device 3197 * PURPOSE: returns the number of soft partitions of a given size 3198 * that can be created on a device 3199 */ 3200 int 3201 meta_sp_get_number_of_possible_sps( 3202 mdsetname_t *mdsetnamep, 3203 mdname_t *mdnamep, 3204 blkcnt_t sp_size 3205 ) 3206 { 3207 sp_ext_node_t *extent_listp; 3208 int number_of_possible_sps; 3209 boolean_t succeeded; 3210 md_error_t mde; 3211 sp_ext_length_t alignment; 3212 3213 extent_listp = NULL; 3214 number_of_possible_sps = 0; 3215 if (sp_size > 0) { 3216 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3217 mdnamep, &extent_listp, &mde)) == B_FALSE) 3218 mdclrerror(&mde); 3219 } else { 3220 succeeded = B_FALSE; 3221 } 3222 3223 if (succeeded == B_TRUE) { 3224 alignment = meta_sp_get_default_alignment(mdsetnamep, 3225 mdnamep, &mde); 3226 } 3227 3228 while (succeeded == B_TRUE) { 3229 /* 3230 * Keep allocating space from the extent list 3231 * for soft partitions of the desired size until 3232 * there's not enough free space left in the list 3233 * for another soft partiition of that size. 3234 * Add one to the number of possible soft partitions 3235 * for each soft partition for which there is 3236 * enough free space left. 3237 */ 3238 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3239 sp_size, &extent_listp, alignment); 3240 if (succeeded == B_TRUE) { 3241 number_of_possible_sps++; 3242 } 3243 } 3244 if (extent_listp != NULL) { 3245 meta_sp_list_free(&extent_listp); 3246 } 3247 return (number_of_possible_sps); 3248 } 3249 3250 /* 3251 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3252 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3253 * for the set containing the drive for 3254 * which the number of possible soft partitions 3255 * is to be returned 3256 * mddrivenamep - a reference to the mddrivename_t of the drive 3257 * for which the number of possible soft partitions 3258 * is to be returned 3259 * sp_size - the size in blocks of the proposed soft partitions 3260 * OUTPUT: int return value 3261 * RETURNS: int - the number of soft partitions of the desired size 3262 * that can be created on the drive 3263 * PURPOSE: returns the number of soft partitions of a given size 3264 * that can be created on a drive, if the entire drive is 3265 * soft partitioned 3266 */ 3267 int 3268 meta_sp_get_number_of_possible_sps_on_drive( 3269 mdsetname_t *mdsetnamep, 3270 mddrivename_t *mddrivenamep, 3271 blkcnt_t sp_size 3272 ) 3273 { 3274 sp_ext_node_t *extent_listp; 3275 int number_of_possible_sps; 3276 boolean_t succeeded; 3277 3278 extent_listp = NULL; 3279 number_of_possible_sps = 0; 3280 if (sp_size > 0) { 3281 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3282 mddrivenamep, &extent_listp); 3283 } else { 3284 succeeded = B_FALSE; 3285 } 3286 while (succeeded == B_TRUE) { 3287 /* 3288 * Keep allocating space from the extent list 3289 * for soft partitions of the desired size until 3290 * there's not enough free space left in the list 3291 * for another soft partition of that size. 3292 * Add one to the number of possible soft partitions 3293 * for each soft partition for which there is 3294 * enough free space left. 3295 * 3296 * Since it's a drive, not a metadevice, make no 3297 * assumptions about alignment. 3298 */ 3299 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3300 sp_size, &extent_listp, SP_UNALIGNED); 3301 if (succeeded == B_TRUE) { 3302 number_of_possible_sps++; 3303 } 3304 } 3305 if (extent_listp != NULL) { 3306 meta_sp_list_free(&extent_listp); 3307 } 3308 return (number_of_possible_sps); 3309 } 3310 3311 /* 3312 * FUNCTION: meta_sp_get_possible_sp_size() 3313 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3314 * for the set containing the device for 3315 * which the possible soft partition size 3316 * is to be returned 3317 * mdnamep - a reference to the mdname_t of the device 3318 * for which the possible soft partition size 3319 * is to be returned 3320 * number_of_sps - the desired number of soft partitions 3321 * OUTPUT: blkcnt_t return value 3322 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3323 * PURPOSE: returns the maximum possible size of each of a given number of 3324 * soft partitions of equal size that can be created on a device 3325 */ 3326 blkcnt_t 3327 meta_sp_get_possible_sp_size( 3328 mdsetname_t *mdsetnamep, 3329 mdname_t *mdnamep, 3330 int number_of_sps 3331 ) 3332 { 3333 blkcnt_t free_blocks; 3334 blkcnt_t sp_size; 3335 boolean_t succeeded; 3336 3337 sp_size = 0; 3338 if (number_of_sps > 0) { 3339 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3340 sp_size = free_blocks / number_of_sps; 3341 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3342 number_of_sps, sp_size); 3343 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3344 /* 3345 * To compensate for space that may have been 3346 * occupied by watermarks, reduce sp_size by a 3347 * number of blocks equal to the number of soft 3348 * partitions desired, and test again to see 3349 * whether the desired number of soft partitions 3350 * can be created. 3351 */ 3352 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3353 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3354 number_of_sps, sp_size); 3355 } 3356 if (sp_size < 0) { 3357 sp_size = 0; 3358 } 3359 } 3360 return (sp_size); 3361 } 3362 3363 /* 3364 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3365 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3366 * for the set containing the drive for 3367 * which the possible soft partition size 3368 * is to be returned 3369 * mddrivenamep - a reference to the mddrivename_t of the drive 3370 * for which the possible soft partition size 3371 * is to be returned 3372 * number_of_sps - the desired number of soft partitions 3373 * OUTPUT: blkcnt_t return value 3374 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3375 * PURPOSE: returns the maximum possible size of each of a given number of 3376 * soft partitions of equal size that can be created on a drive 3377 * if the entire drive is soft partitioned 3378 */ 3379 blkcnt_t 3380 meta_sp_get_possible_sp_size_on_drive( 3381 mdsetname_t *mdsetnamep, 3382 mddrivename_t *mddrivenamep, 3383 int number_of_sps 3384 ) 3385 { 3386 blkcnt_t free_blocks; 3387 blkcnt_t sp_size; 3388 boolean_t succeeded; 3389 3390 sp_size = 0; 3391 if (number_of_sps > 0) { 3392 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3393 mddrivenamep); 3394 sp_size = free_blocks / number_of_sps; 3395 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3396 mddrivenamep, number_of_sps, sp_size); 3397 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3398 /* 3399 * To compensate for space that may have been 3400 * occupied by watermarks, reduce sp_size by a 3401 * number of blocks equal to the number of soft 3402 * partitions desired, and test again to see 3403 * whether the desired number of soft partitions 3404 * can be created. 3405 */ 3406 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3407 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3408 mddrivenamep, number_of_sps, sp_size); 3409 } 3410 if (sp_size < 0) { 3411 sp_size = 0; 3412 } 3413 } 3414 return (sp_size); 3415 } 3416 3417 /* 3418 * ************************************************************************** 3419 * Unit Structure Manipulation Functions * 3420 * ************************************************************************** 3421 */ 3422 3423 /* 3424 * FUNCTION: meta_sp_fillextarray() 3425 * INPUT: mp - the unit structure to fill 3426 * extlist - the list of extents to fill with 3427 * OUTPUT: none 3428 * RETURNS: void 3429 * PURPOSE: fills in the unit structure extent list with the extents 3430 * specified by extlist. Only extents in extlist with the 3431 * EXTFLG_UPDATE flag are changed in the unit structure, 3432 * and the index into the unit structure is the sequence 3433 * number in the extent list. After all of the nodes have 3434 * been updated the virtual offsets in the unit structure 3435 * are updated to reflect the new lengths. 3436 */ 3437 static void 3438 meta_sp_fillextarray( 3439 mp_unit_t *mp, 3440 sp_ext_node_t *extlist 3441 ) 3442 { 3443 int i; 3444 sp_ext_node_t *ext; 3445 sp_ext_offset_t curvoff = 0LL; 3446 3447 assert(mp != NULL); 3448 3449 /* go through the allocation list and fill in our unit structure */ 3450 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3451 if ((ext->ext_type == EXTTYP_ALLOC) && 3452 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3453 mp->un_ext[ext->ext_seq].un_poff = 3454 ext->ext_offset + MD_SP_WMSIZE; 3455 mp->un_ext[ext->ext_seq].un_len = 3456 ext->ext_length - MD_SP_WMSIZE; 3457 } 3458 } 3459 3460 for (i = 0; i < mp->un_numexts; i++) { 3461 assert(mp->un_ext[i].un_poff != 0); 3462 assert(mp->un_ext[i].un_len != 0); 3463 mp->un_ext[i].un_voff = curvoff; 3464 curvoff += mp->un_ext[i].un_len; 3465 } 3466 } 3467 3468 /* 3469 * FUNCTION: meta_sp_createunit() 3470 * INPUT: np - the name of the device to create a unit structure for 3471 * compnp - the name of the device the soft partition is on 3472 * extlist - the extent list to populate the new unit with 3473 * numexts - the number of extents in the extent list 3474 * len - the total size of the soft partition (sectors) 3475 * status - the initial status of the unit structure 3476 * OUTPUT: ep - return error pointer 3477 * RETURNS: mp_unit_t * - the new unit structure. 3478 * PURPOSE: allocates and fills in a new soft partition unit 3479 * structure to be passed to the soft partitioning driver 3480 * for creation. 3481 */ 3482 static mp_unit_t * 3483 meta_sp_createunit( 3484 mdname_t *np, 3485 mdname_t *compnp, 3486 sp_ext_node_t *extlist, 3487 int numexts, 3488 sp_ext_length_t len, 3489 sp_status_t status, 3490 md_error_t *ep 3491 ) 3492 { 3493 mp_unit_t *mp; 3494 uint_t ms_size; 3495 3496 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3497 (numexts * sizeof (mp->un_ext[0])); 3498 3499 mp = Zalloc(ms_size); 3500 3501 /* fill in fields in common unit structure */ 3502 mp->c.un_type = MD_METASP; 3503 mp->c.un_size = ms_size; 3504 MD_SID(mp) = meta_getminor(np->dev); 3505 mp->c.un_total_blocks = len; 3506 mp->c.un_actual_tb = len; 3507 3508 /* set up geometry */ 3509 (void) meta_sp_setgeom(np, compnp, mp, ep); 3510 3511 /* if we're building on metadevice we can't parent */ 3512 if (metaismeta(compnp)) 3513 MD_CAPAB(mp) = MD_CANT_PARENT; 3514 else 3515 MD_CAPAB(mp) = MD_CAN_PARENT; 3516 3517 /* fill soft partition-specific fields */ 3518 mp->un_dev = compnp->dev; 3519 mp->un_key = compnp->key; 3520 3521 /* mdname_t start_blk field is not 64-bit! */ 3522 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3523 mp->un_status = status; 3524 mp->un_numexts = numexts; 3525 mp->un_length = len; 3526 3527 /* fill in the extent array */ 3528 meta_sp_fillextarray(mp, extlist); 3529 3530 return (mp); 3531 } 3532 3533 /* 3534 * FUNCTION: meta_sp_updateunit() 3535 * INPUT: np - name structure for the metadevice being updated 3536 * old_un - the original unit structure that is being updated 3537 * extlist - the extent list to populate the new unit with 3538 * grow_len - the amount by which the partition is being grown 3539 * numexts - the number of extents in the extent list 3540 * ep - return error pointer 3541 * OUTPUT: none 3542 * RETURNS: mp_unit_t * - the updated unit structure 3543 * PURPOSE: allocates and fills in a new soft partition unit structure to 3544 * be passed to the soft partitioning driver for creation. The 3545 * old unit structure is first copied in, and then the updated 3546 * extents are changed in the new unit structure. This is 3547 * typically used when the size of an existing unit is changed. 3548 */ 3549 static mp_unit_t * 3550 meta_sp_updateunit( 3551 mdname_t *np, 3552 mp_unit_t *old_un, 3553 sp_ext_node_t *extlist, 3554 sp_ext_length_t grow_len, 3555 int numexts, 3556 md_error_t *ep 3557 ) 3558 { 3559 mp_unit_t *new_un; 3560 sp_ext_length_t new_len; 3561 uint_t new_size; 3562 3563 assert(old_un != NULL); 3564 assert(extlist != NULL); 3565 3566 /* allocate new unit structure and copy in old unit */ 3567 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3568 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3569 new_len = old_un->un_length + grow_len; 3570 new_un = Zalloc(new_size); 3571 bcopy(old_un, new_un, old_un->c.un_size); 3572 3573 /* update size and geometry information */ 3574 new_un->c.un_size = new_size; 3575 new_un->un_length = new_len; 3576 new_un->c.un_total_blocks = new_len; 3577 new_un->c.un_actual_tb = new_len; 3578 if (meta_adjust_geom((md_unit_t *)new_un, np, 3579 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3580 0, ep) != 0) { 3581 Free(new_un); 3582 return (NULL); 3583 } 3584 3585 /* update extent information */ 3586 new_un->un_numexts += numexts; 3587 3588 meta_sp_fillextarray(new_un, extlist); 3589 3590 return (new_un); 3591 } 3592 3593 /* 3594 * FUNCTION: meta_get_sp() 3595 * INPUT: sp - the set name for the device to get 3596 * np - the name of the device to get 3597 * OUTPUT: ep - return error pointer 3598 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3599 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3600 * for the named device. Just a wrapper for meta_get_sp_common(). 3601 */ 3602 md_sp_t * 3603 meta_get_sp( 3604 mdsetname_t *sp, 3605 mdname_t *np, 3606 md_error_t *ep 3607 ) 3608 { 3609 return (meta_get_sp_common(sp, np, 0, ep)); 3610 } 3611 3612 /* 3613 * FUNCTION: meta_get_sp_common() 3614 * INPUT: sp - the set name for the device to get 3615 * np - the name of the device to get 3616 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3617 * OUTPUT: ep - return error pointer 3618 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3619 * NULL if np is not a soft partition 3620 * PURPOSE: common routine for fetching a soft partition unit structure 3621 */ 3622 md_sp_t * 3623 meta_get_sp_common( 3624 mdsetname_t *sp, 3625 mdname_t *np, 3626 int fast, 3627 md_error_t *ep 3628 ) 3629 { 3630 mddrivename_t *dnp = np->drivenamep; 3631 char *miscname; 3632 mp_unit_t *mp; 3633 md_sp_t *msp; 3634 int i; 3635 3636 /* must have set */ 3637 assert(sp != NULL); 3638 3639 /* short circuit */ 3640 if (dnp->unitp != NULL) { 3641 if (dnp->unitp->type != MD_METASP) 3642 return (NULL); 3643 return ((md_sp_t *)dnp->unitp); 3644 } 3645 /* get miscname and unit */ 3646 if ((miscname = metagetmiscname(np, ep)) == NULL) 3647 return (NULL); 3648 3649 if (strcmp(miscname, MD_SP) != 0) { 3650 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3651 return (NULL); 3652 } 3653 3654 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3655 return (NULL); 3656 3657 assert(mp->c.un_type == MD_METASP); 3658 3659 /* allocate soft partition */ 3660 msp = Zalloc(sizeof (*msp)); 3661 3662 /* get the common information */ 3663 msp->common.namep = np; 3664 msp->common.type = mp->c.un_type; 3665 msp->common.state = mp->c.un_status; 3666 msp->common.capabilities = mp->c.un_capabilities; 3667 msp->common.parent = mp->c.un_parent; 3668 msp->common.size = mp->c.un_total_blocks; 3669 msp->common.user_flags = mp->c.un_user_flags; 3670 msp->common.revision = mp->c.un_revision; 3671 3672 /* get soft partition information */ 3673 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3674 goto out; 3675 3676 /* 3677 * Fill in the key and the start block. Note that the start 3678 * block in the unit structure is 64 bits but the name pointer 3679 * only supports 32 bits. 3680 */ 3681 msp->compnamep->key = mp->un_key; 3682 msp->compnamep->start_blk = mp->un_start_blk; 3683 3684 /* fill in status field */ 3685 msp->status = mp->un_status; 3686 3687 /* allocate the extents */ 3688 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3689 msp->ext.ext_len = mp->un_numexts; 3690 3691 /* do the extents for this soft partition */ 3692 for (i = 0; i < mp->un_numexts; i++) { 3693 struct mp_ext *mde = &mp->un_ext[i]; 3694 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3695 3696 extp->voff = mde->un_voff; 3697 extp->poff = mde->un_poff; 3698 extp->len = mde->un_len; 3699 } 3700 3701 /* cleanup, return success */ 3702 Free(mp); 3703 dnp->unitp = (md_common_t *)msp; 3704 return (msp); 3705 3706 out: 3707 /* clean up and return error */ 3708 Free(mp); 3709 Free(msp); 3710 return (NULL); 3711 } 3712 3713 3714 /* 3715 * FUNCTION: meta_init_sp() 3716 * INPUT: spp - the set name for the new device 3717 * argc - the remaining argument count for the metainit cmdline 3718 * argv - the remainder of the unparsed command line 3719 * options - global options parsed by metainit 3720 * OUTPUT: ep - return error pointer 3721 * RETURNS: int - -1 failure, 0 success 3722 * PURPOSE: provides the command line parsing and name management overhead 3723 * for creating a new soft partition. Ultimately this calls 3724 * meta_create_sp() which does the real work of allocating space 3725 * for the new soft partition. 3726 */ 3727 int 3728 meta_init_sp( 3729 mdsetname_t **spp, 3730 int argc, 3731 char *argv[], 3732 mdcmdopts_t options, 3733 md_error_t *ep 3734 ) 3735 { 3736 char *compname = NULL; 3737 mdname_t *spcompnp = NULL; /* name of component volume */ 3738 char *devname = argv[0]; /* unit name */ 3739 mdname_t *np = NULL; /* name of soft partition */ 3740 md_sp_t *msp = NULL; 3741 int c; 3742 int old_optind; 3743 sp_ext_length_t len = 0LL; 3744 int rval = -1; 3745 uint_t seq; 3746 int oflag; 3747 int failed; 3748 mddrivename_t *dnp = NULL; 3749 sp_ext_length_t alignment = 0LL; 3750 sp_ext_node_t *extlist = NULL; 3751 3752 assert(argc > 0); 3753 3754 /* expect sp name, -p, optional -e, compname, and size parameters */ 3755 /* grab soft partition name */ 3756 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3757 goto out; 3758 3759 /* see if it exists already */ 3760 if (metagetmiscname(np, ep) != NULL) { 3761 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3762 meta_getminor(np->dev), devname); 3763 goto out; 3764 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3765 goto out; 3766 } else { 3767 mdclrerror(ep); 3768 } 3769 --argc, ++argv; 3770 3771 if (argc == 0) 3772 goto syntax; 3773 3774 /* grab -p */ 3775 if (strcmp(argv[0], "-p") != 0) 3776 goto syntax; 3777 --argc, ++argv; 3778 3779 if (argc == 0) 3780 goto syntax; 3781 3782 /* see if -e is there */ 3783 if (strcmp(argv[0], "-e") == 0) { 3784 /* use the whole disk */ 3785 options |= MDCMD_USE_WHOLE_DISK; 3786 --argc, ++argv; 3787 } 3788 3789 if (argc == 0) 3790 goto syntax; 3791 3792 /* get component name */ 3793 compname = Strdup(argv[0]); 3794 3795 if (options & MDCMD_USE_WHOLE_DISK) { 3796 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3797 goto out; 3798 } 3799 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3800 goto out; 3801 } 3802 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3803 goto out; 3804 } 3805 assert(*spp != NULL); 3806 3807 if (!(options & MDCMD_NOLOCK)) { 3808 /* grab set lock */ 3809 if (meta_lock(*spp, TRUE, ep)) 3810 goto out; 3811 3812 if (meta_check_ownership(*spp, ep) != 0) 3813 goto out; 3814 } 3815 3816 /* allocate the soft partition */ 3817 msp = Zalloc(sizeof (*msp)); 3818 3819 /* setup common */ 3820 msp->common.namep = np; 3821 msp->common.type = MD_METASP; 3822 3823 compname = spcompnp->cname; 3824 3825 assert(spcompnp->rname != NULL); 3826 --argc, ++argv; 3827 3828 if (argc == 0) { 3829 goto syntax; 3830 } 3831 3832 if (*argv[0] == '-') { 3833 /* 3834 * parse any other command line options, this includes 3835 * the recovery options -o and -b. The special thing 3836 * with these options is that the len needs to be 3837 * kept track of otherwise when the geometry of the 3838 * "device" is built it will create an invalid geometry 3839 */ 3840 old_optind = optind = 0; 3841 opterr = 0; 3842 oflag = 0; 3843 seq = 0; 3844 failed = 0; 3845 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3846 sp_ext_offset_t offset; 3847 sp_ext_length_t length; 3848 longlong_t tmp_size; 3849 3850 switch (c) { 3851 case 'A': /* data alignment */ 3852 if (meta_sp_parsesizestring(optarg, 3853 &alignment) == -1) { 3854 failed = 1; 3855 } 3856 break; 3857 case 'o': /* offset in the partition */ 3858 if (oflag == 1) { 3859 failed = 1; 3860 } else { 3861 tmp_size = atoll(optarg); 3862 if (tmp_size <= 0) { 3863 failed = 1; 3864 } else { 3865 oflag = 1; 3866 options |= MDCMD_DIRECT; 3867 3868 offset = tmp_size; 3869 } 3870 } 3871 3872 break; 3873 case 'b': /* number of blocks */ 3874 if (oflag == 0) { 3875 failed = 1; 3876 } else { 3877 tmp_size = atoll(optarg); 3878 if (tmp_size <= 0) { 3879 failed = 1; 3880 } else { 3881 oflag = 0; 3882 3883 length = tmp_size; 3884 3885 /* we have a pair of values */ 3886 meta_sp_list_insert(*spp, np, 3887 &extlist, offset, length, 3888 EXTTYP_ALLOC, seq++, 3889 EXTFLG_UPDATE, 3890 meta_sp_cmp_by_offset); 3891 len += length; 3892 } 3893 } 3894 3895 break; 3896 default: 3897 argc -= old_optind; 3898 argv += old_optind; 3899 goto options; 3900 } 3901 3902 if (failed) { 3903 argc -= old_optind; 3904 argv += old_optind; 3905 goto syntax; 3906 } 3907 3908 old_optind = optind; 3909 } 3910 argc -= optind; 3911 argv += optind; 3912 3913 /* 3914 * Must have matching pairs of -o and -b flags 3915 */ 3916 if (oflag != 0) 3917 goto syntax; 3918 3919 /* 3920 * Can't specify both layout (indicated indirectly by 3921 * len being set by thye -o/-b cases above) AND 3922 * alignment 3923 */ 3924 if ((len > 0LL) && (alignment > 0LL)) 3925 goto syntax; 3926 3927 /* 3928 * sanity check the allocation list 3929 */ 3930 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3931 goto syntax; 3932 } 3933 3934 if (len == 0LL) { 3935 if (argc == 0) 3936 goto syntax; 3937 if (meta_sp_parsesize(argv[0], &len) == -1) 3938 goto syntax; 3939 --argc, ++argv; 3940 } 3941 3942 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3943 msp->ext.ext_val->len = len; 3944 msp->compnamep = spcompnp; 3945 3946 /* we should be at the end */ 3947 if (argc != 0) 3948 goto syntax; 3949 3950 /* create soft partition */ 3951 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3952 goto out; 3953 rval = 0; 3954 3955 /* let em know */ 3956 if (options & MDCMD_PRINT) { 3957 (void) printf(dgettext(TEXT_DOMAIN, 3958 "%s: Soft Partition is setup\n"), 3959 devname); 3960 (void) fflush(stdout); 3961 } 3962 goto out; 3963 3964 syntax: 3965 /* syntax error */ 3966 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3967 goto out; 3968 3969 options: 3970 /* options error */ 3971 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3972 goto out; 3973 3974 out: 3975 if (msp != NULL) { 3976 if (msp->ext.ext_val != NULL) { 3977 Free(msp->ext.ext_val); 3978 } 3979 Free(msp); 3980 } 3981 3982 return (rval); 3983 } 3984 3985 /* 3986 * FUNCTION: meta_free_sp() 3987 * INPUT: msp - the soft partition unit to free 3988 * OUTPUT: none 3989 * RETURNS: void 3990 * PURPOSE: provides an interface from the rest of libmeta for freeing a 3991 * soft partition unit 3992 */ 3993 void 3994 meta_free_sp(md_sp_t *msp) 3995 { 3996 Free(msp); 3997 } 3998 3999 /* 4000 * FUNCTION: meta_sp_issp() 4001 * INPUT: sp - the set name to check 4002 * np - the name to check 4003 * OUTPUT: ep - return error pointer 4004 * RETURNS: int - 0 means sp,np is a soft partition 4005 * 1 means sp,np is not a soft partition 4006 * PURPOSE: determines whether the given device is a soft partition 4007 * device. This is called by other metadevice check routines. 4008 */ 4009 int 4010 meta_sp_issp( 4011 mdsetname_t *sp, 4012 mdname_t *np, 4013 md_error_t *ep 4014 ) 4015 { 4016 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 4017 return (1); 4018 4019 return (0); 4020 } 4021 4022 /* 4023 * FUNCTION: meta_check_sp() 4024 * INPUT: sp - the set name to check 4025 * msp - the unit structure to check 4026 * options - creation options 4027 * OUTPUT: repart_options - options to be passed to 4028 * meta_repartition_drive() 4029 * ep - return error pointer 4030 * RETURNS: int - 0 ok to create on this component 4031 * -1 error or not ok to create on this component 4032 * PURPOSE: Checks to determine whether the rules for creation of 4033 * soft partitions allow creation of a soft partition on 4034 * the device described by the mdname_t structure referred 4035 * to by msp->compnamep. 4036 * 4037 * NOTE: Does NOT check to determine whether the extents 4038 * described in the md_sp_t structure referred to by 4039 * msp will fit on the device described by the mdname_t 4040 * structure located at msp->compnamep. 4041 */ 4042 static int 4043 meta_check_sp( 4044 mdsetname_t *sp, 4045 md_sp_t *msp, 4046 mdcmdopts_t options, 4047 int *repart_options, 4048 md_error_t *ep 4049 ) 4050 { 4051 md_common_t *mdp; 4052 mdname_t *compnp = msp->compnamep; 4053 uint_t slice; 4054 mddrivename_t *dnp; 4055 mdname_t *slicenp; 4056 mdvtoc_t *vtocp; 4057 4058 /* make sure it is in the set */ 4059 if (meta_check_inset(sp, compnp, ep) != 0) 4060 return (-1); 4061 4062 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4063 uint_t rep_slice; 4064 4065 /* 4066 * check to make sure we can partition this drive. 4067 * we cannot continue if any of the following are 4068 * true: 4069 * The drive is a metadevice. 4070 * The drive contains a mounted slice. 4071 * The drive contains a slice being swapped to. 4072 * The drive contains slices which are part of other 4073 * metadevices. 4074 * The drive contains a metadb. 4075 */ 4076 if (metaismeta(compnp)) 4077 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4078 compnp->cname)); 4079 4080 assert(compnp->drivenamep != NULL); 4081 4082 /* 4083 * ensure that we have slice 0 since the disk will be 4084 * repartitioned in the USE_WHOLE_DISK case. this check 4085 * is redundant unless the user incorrectly specifies a 4086 * a fully qualified drive AND slice name (i.e., 4087 * /dev/dsk/cXtXdXsX), which will be incorrectly 4088 * recognized as a drive name by the metaname code. 4089 */ 4090 4091 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4092 return (-1); 4093 if (slice != MD_SLICE0) 4094 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4095 4096 dnp = compnp->drivenamep; 4097 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4098 return (-1); 4099 4100 for (slice = 0; slice < vtocp->nparts; slice++) { 4101 4102 /* only check if the slice really exists */ 4103 if (vtocp->parts[slice].size == 0) 4104 continue; 4105 4106 slicenp = metaslicename(dnp, slice, ep); 4107 if (slicenp == NULL) 4108 return (-1); 4109 4110 /* check to ensure that it is not already in use */ 4111 if (meta_check_inuse(sp, 4112 slicenp, MDCHK_INUSE, ep) != 0) { 4113 return (-1); 4114 } 4115 4116 /* 4117 * Up to this point, tests are applied to all 4118 * slices uniformly. 4119 */ 4120 4121 if (slice == rep_slice) { 4122 /* 4123 * Tests inside the body of this 4124 * conditional are applied only to 4125 * slice seven. 4126 */ 4127 if (meta_check_inmeta(sp, slicenp, 4128 options | MDCHK_ALLOW_MDDB | 4129 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4130 return (-1); 4131 4132 /* 4133 * For slice seven, a metadb is NOT an 4134 * automatic failure. It merely means 4135 * that we're not allowed to muck 4136 * about with the partitioning of that 4137 * slice. We indicate this by masking 4138 * in the MD_REPART_LEAVE_REP flag. 4139 */ 4140 if (metahasmddb(sp, slicenp, ep)) { 4141 assert(repart_options != 4142 NULL); 4143 *repart_options |= 4144 MD_REPART_LEAVE_REP; 4145 } 4146 4147 /* 4148 * Skip the remaining tests for slice 4149 * seven 4150 */ 4151 continue; 4152 } 4153 4154 /* 4155 * Tests below this point will be applied to 4156 * all slices EXCEPT for the replica slice. 4157 */ 4158 4159 4160 /* check if component is in a metadevice */ 4161 if (meta_check_inmeta(sp, slicenp, options, 0, 4162 -1, ep) != 0) 4163 return (-1); 4164 4165 /* check to see if component has a metadb */ 4166 if (metahasmddb(sp, slicenp, ep)) 4167 return (mddeverror(ep, MDE_HAS_MDDB, 4168 slicenp->dev, slicenp->cname)); 4169 } 4170 /* 4171 * This should be all of the testing necessary when 4172 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4173 * meta_check_sp() is oriented towards component 4174 * arguments instead of disks. 4175 */ 4176 goto meta_check_sp_ok; 4177 4178 } 4179 4180 /* check to ensure that it is not already in use */ 4181 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4182 return (-1); 4183 } 4184 4185 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4186 4187 /* 4188 * The component can have one or more soft partitions on it 4189 * already, but can't be part of any other type of metadevice, 4190 * so if it is used for a metadevice, but the metadevice 4191 * isn't a soft partition, return failure. 4192 */ 4193 4194 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4195 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4196 return (-1); 4197 } 4198 } else { /* handle metadevices */ 4199 /* get underlying unit & check capabilities */ 4200 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4201 return (-1); 4202 4203 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4204 (! (mdp->capabilities & MD_CAN_SP))) 4205 return (mdmderror(ep, MDE_INVAL_UNIT, 4206 meta_getminor(compnp->dev), compnp->cname)); 4207 } 4208 4209 meta_check_sp_ok: 4210 mdclrerror(ep); 4211 return (0); 4212 } 4213 4214 /* 4215 * FUNCTION: meta_create_sp() 4216 * INPUT: sp - the set name to create in 4217 * msp - the unit structure to create 4218 * oblist - an optional list of requested extents (-o/-b options) 4219 * options - creation options 4220 * alignment - data alignment 4221 * OUTPUT: ep - return error pointer 4222 * RETURNS: int - 0 success, -1 error 4223 * PURPOSE: does most of the work for creating a soft partition. If 4224 * metainit -p -e was used, first partition the drive. Then 4225 * create an extent list based on the existing soft partitions 4226 * and assume all space not used by them is free. Storage for 4227 * the new soft partition is allocated from the free extents 4228 * based on the length specified on the command line or the 4229 * oblist passed in. The unit structure is then committed and 4230 * the watermarks are updated. Finally, the status is changed to 4231 * Okay and the process is complete. 4232 */ 4233 static int 4234 meta_create_sp( 4235 mdsetname_t *sp, 4236 md_sp_t *msp, 4237 sp_ext_node_t *oblist, 4238 mdcmdopts_t options, 4239 sp_ext_length_t alignment, 4240 md_error_t *ep 4241 ) 4242 { 4243 mdname_t *np = msp->common.namep; 4244 mdname_t *compnp = msp->compnamep; 4245 mp_unit_t *mp = NULL; 4246 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4247 md_set_params_t set_params; 4248 int rval = -1; 4249 diskaddr_t comp_size; 4250 diskaddr_t sp_start; 4251 sp_ext_node_t *extlist = NULL; 4252 int numexts = 0; /* number of extents */ 4253 int count = 0; 4254 int committed = 0; 4255 int repart_options = MD_REPART_FORCE; 4256 int create_flag = MD_CRO_32BIT; 4257 int mn_set_master = 0; 4258 4259 md_set_desc *sd; 4260 md_set_mmown_params_t *ownpar = NULL; 4261 int comp_is_mirror = 0; 4262 4263 /* validate soft partition */ 4264 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4265 return (-1); 4266 4267 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4268 if ((options & MDCMD_DOIT) != 0) { 4269 if (meta_repartition_drive(sp, 4270 compnp->drivenamep, 4271 repart_options, 4272 NULL, /* Don't return the VTOC */ 4273 ep) != 0) 4274 4275 return (-1); 4276 } else { 4277 /* 4278 * If -n and -e are both specified, it doesn't make 4279 * sense to continue without actually partitioning 4280 * the drive. 4281 */ 4282 return (0); 4283 } 4284 } 4285 4286 /* populate the start_blk field of the component name */ 4287 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4288 MD_DISKADDR_ERROR) { 4289 rval = -1; 4290 goto out; 4291 } 4292 4293 if (options & MDCMD_DOIT) { 4294 /* store name in namespace */ 4295 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4296 rval = -1; 4297 goto out; 4298 } 4299 } 4300 4301 /* 4302 * Get a list of the soft partitions that currently reside on 4303 * the component. We should ALWAYS force reload the cache, 4304 * because if this is a single creation, there will not BE a 4305 * cached list, and if we're using the md.tab, we must rebuild 4306 * the list because it won't contain the previous (if any) 4307 * soft partition. 4308 */ 4309 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4310 if (count < 0) { 4311 /* error occured */ 4312 rval = -1; 4313 goto out; 4314 } 4315 4316 /* 4317 * get the size of the underlying device. if the size is smaller 4318 * than or equal to the watermark size, we know there isn't 4319 * enough space. 4320 */ 4321 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4322 rval = -1; 4323 goto out; 4324 } else if (comp_size <= MD_SP_WMSIZE) { 4325 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4326 rval = -1; 4327 goto out; 4328 } 4329 /* 4330 * seed extlist with reserved space at the beginning of the volume and 4331 * enough space for the end watermark. The end watermark always gets 4332 * updated, but if the underlying device changes size it may not be 4333 * pointed to until the extent before it is updated. Since the 4334 * end of the reserved space is where the first watermark starts, 4335 * the reserved extent should never be marked for updating. 4336 */ 4337 4338 meta_sp_list_insert(NULL, NULL, &extlist, 4339 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4340 meta_sp_list_insert(NULL, NULL, &extlist, 4341 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4342 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4343 4344 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4345 rval = -1; 4346 goto out; 4347 } 4348 4349 metafreenamelist(spnlp); 4350 4351 if (getenv(META_SP_DEBUG)) { 4352 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4353 meta_sp_list_dump(extlist); 4354 } 4355 4356 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4357 4358 /* get extent list from -o/-b options or from free space */ 4359 if (options & MDCMD_DIRECT) { 4360 if (getenv(META_SP_DEBUG)) { 4361 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4362 meta_sp_list_dump(oblist); 4363 } 4364 4365 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4366 if (numexts == -1) { 4367 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4368 rval = -1; 4369 goto out; 4370 } 4371 } else { 4372 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4373 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4374 meta_sp_get_default_alignment(sp, compnp, ep)); 4375 if (numexts == -1) { 4376 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4377 rval = -1; 4378 goto out; 4379 } 4380 } 4381 4382 assert(extlist != NULL); 4383 4384 /* create soft partition */ 4385 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4386 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4387 4388 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4389 4390 /* if we're not doing anything (metainit -n), return success */ 4391 if (! (options & MDCMD_DOIT)) { 4392 rval = 0; /* success */ 4393 goto out; 4394 } 4395 4396 (void) memset(&set_params, 0, sizeof (set_params)); 4397 4398 if (create_flag == MD_CRO_64BIT) { 4399 mp->c.un_revision |= MD_64BIT_META_DEV; 4400 set_params.options = MD_CRO_64BIT; 4401 } else { 4402 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4403 set_params.options = MD_CRO_32BIT; 4404 } 4405 4406 if (getenv(META_SP_DEBUG)) { 4407 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4408 meta_sp_printunit(mp); 4409 } 4410 4411 /* 4412 * Check to see if we're trying to create a partition on a mirror. If so 4413 * we may have to enforce an ownership change before writing the 4414 * watermark out. 4415 */ 4416 if (metaismeta(compnp)) { 4417 char *miscname; 4418 4419 miscname = metagetmiscname(compnp, ep); 4420 if (miscname != NULL) 4421 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4422 else 4423 comp_is_mirror = 0; 4424 } else { 4425 comp_is_mirror = 0; 4426 } 4427 4428 /* 4429 * For a multi-node environment we have to ensure that the master 4430 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4431 * If the master does not own the device we will deadlock as the 4432 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4433 * ownership change that will block as the MD_IOCSET is still in 4434 * progress. To close this window we force an owner change to occur 4435 * before issuing the MD_IOCSET. We cannot simply open the device and 4436 * write to it as this will only work for the first soft-partition 4437 * creation. 4438 */ 4439 4440 if (comp_is_mirror && !metaislocalset(sp)) { 4441 4442 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4443 rval = -1; 4444 goto out; 4445 } 4446 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4447 mn_set_master = 1; 4448 } 4449 } 4450 4451 set_params.mnum = MD_SID(mp); 4452 set_params.size = mp->c.un_size; 4453 set_params.mdp = (uintptr_t)mp; 4454 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4455 4456 /* first phase of commit. */ 4457 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4458 np->cname) != 0) { 4459 (void) mdstealerror(ep, &set_params.mde); 4460 rval = -1; 4461 goto out; 4462 } 4463 4464 /* we've successfully committed the record */ 4465 committed = 1; 4466 4467 /* write watermarks */ 4468 /* 4469 * Special-case for Multi-node sets. As we now have a distributed DRL 4470 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case 4471 * unless we use a 'special' MN-capable ioctl to stage the watermark 4472 * update. This only affects the master-node in an MN set. 4473 */ 4474 if (mn_set_master) { 4475 if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) { 4476 rval = -1; 4477 goto out; 4478 } 4479 } else { 4480 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4481 rval = -1; 4482 goto out; 4483 } 4484 } 4485 4486 /* second phase of commit, set status to MD_SP_OK */ 4487 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4488 rval = -1; 4489 goto out; 4490 } 4491 rval = 0; 4492 out: 4493 Free(mp); 4494 if (ownpar) 4495 Free(ownpar); 4496 4497 if (extlist != NULL) 4498 meta_sp_list_free(&extlist); 4499 4500 if (rval != 0 && keynlp != NULL && committed != 1) 4501 (void) del_key_names(sp, keynlp, NULL); 4502 4503 metafreenamelist(keynlp); 4504 4505 return (rval); 4506 } 4507 4508 /* 4509 * ************************************************************************** 4510 * Reset (metaclear) Functions * 4511 * ************************************************************************** 4512 */ 4513 4514 /* 4515 * FUNCTION: meta_sp_reset_common() 4516 * INPUT: sp - the set name of the device to reset 4517 * np - the name of the device to reset 4518 * msp - the unit structure to reset 4519 * options - metaclear options 4520 * OUTPUT: ep - return error pointer 4521 * RETURNS: int - 0 success, -1 error 4522 * PURPOSE: "resets", or more accurately deletes, the soft partition 4523 * specified. First the state is set to "deleting" and then the 4524 * watermarks are all cleared out. Once the watermarks have been 4525 * updated, the unit structure is deleted from the metadb. 4526 */ 4527 static int 4528 meta_sp_reset_common( 4529 mdsetname_t *sp, 4530 mdname_t *np, 4531 md_sp_t *msp, 4532 md_sp_reset_t reset_params, 4533 mdcmdopts_t options, 4534 md_error_t *ep 4535 ) 4536 { 4537 char *miscname; 4538 int rval = -1; 4539 int is_open = 0; 4540 4541 /* make sure that nobody owns us */ 4542 if (MD_HAS_PARENT(msp->common.parent)) 4543 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4544 np->cname)); 4545 4546 /* make sure that the soft partition isn't open */ 4547 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4548 return (-1); 4549 else if (is_open) 4550 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4551 np->cname)); 4552 4553 /* get miscname */ 4554 if ((miscname = metagetmiscname(np, ep)) == NULL) 4555 return (-1); 4556 4557 /* fill in reset params */ 4558 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4559 reset_params.mnum = meta_getminor(np->dev); 4560 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4561 4562 /* 4563 * clear soft partition - phase one. 4564 * place the soft partition into the "delete pending" state. 4565 */ 4566 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4567 return (-1); 4568 4569 /* 4570 * Now clear the watermarks. If the force flag is specified, 4571 * ignore any errors writing the watermarks and delete the unit 4572 * structure anyway. An error may leave the on-disk format in a 4573 * corrupt state. If force is not specified and we fail here, 4574 * the soft partition will remain in the "delete pending" state. 4575 */ 4576 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4577 ((options & MDCMD_FORCE) == 0)) 4578 goto out; 4579 4580 /* 4581 * clear soft partition - phase two. 4582 * the driver removes the soft partition from the metadb and 4583 * zeros out incore version. 4584 */ 4585 if (metaioctl(MD_IOCRESET, &reset_params, 4586 &reset_params.mde, np->cname) != 0) { 4587 (void) mdstealerror(ep, &reset_params.mde); 4588 goto out; 4589 } 4590 4591 /* 4592 * Wait for the /dev to be cleaned up. Ignore the return 4593 * value since there's not much we can do. 4594 */ 4595 (void) meta_update_devtree(meta_getminor(np->dev)); 4596 4597 rval = 0; /* success */ 4598 4599 if (options & MDCMD_PRINT) { 4600 (void) printf(dgettext(TEXT_DOMAIN, 4601 "%s: Soft Partition is cleared\n"), 4602 np->cname); 4603 (void) fflush(stdout); 4604 } 4605 4606 /* 4607 * if told to recurse and on a metadevice, then attempt to 4608 * clear the subdevices. Indicate failure if the clear fails. 4609 */ 4610 if ((options & MDCMD_RECURSE) && 4611 (metaismeta(msp->compnamep)) && 4612 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4613 rval = -1; 4614 4615 out: 4616 meta_invalidate_name(np); 4617 return (rval); 4618 } 4619 4620 /* 4621 * FUNCTION: meta_sp_reset() 4622 * INPUT: sp - the set name of the device to reset 4623 * np - the name of the device to reset 4624 * options - metaclear options 4625 * OUTPUT: ep - return error pointer 4626 * RETURNS: int - 0 success, -1 error 4627 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4628 * soft partition. If np is NULL, then soft partitions are 4629 * all deleted at the current level and then recursively deleted. 4630 * Otherwise, if a name is specified either directly or as a 4631 * result of a recursive operation, it deletes only that name. 4632 * Since something sitting under a soft partition may be parented 4633 * to it, we have to reparent that other device to another soft 4634 * partition on the same component if we're deleting the one it's 4635 * parented to. 4636 */ 4637 int 4638 meta_sp_reset( 4639 mdsetname_t *sp, 4640 mdname_t *np, 4641 mdcmdopts_t options, 4642 md_error_t *ep 4643 ) 4644 { 4645 md_sp_t *msp; 4646 int rval = -1; 4647 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4648 md_sp_reset_t reset_params; 4649 int num_sp; 4650 4651 assert(sp != NULL); 4652 4653 /* reset/delete all soft paritions */ 4654 if (np == NULL) { 4655 /* 4656 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4657 * is incorrect for soft partitions. We want to clear 4658 * all soft partitions at a particular level in the 4659 * metadevice stack before moving to the next level. 4660 * Thus, we clear MDCMD_RECURSE from the options. 4661 */ 4662 options &= ~MDCMD_RECURSE; 4663 4664 /* for each soft partition */ 4665 rval = 0; 4666 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4667 rval = -1; 4668 4669 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4670 np = nlp->namep; 4671 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4672 rval = -1; 4673 break; 4674 } 4675 /* 4676 * meta_reset_all calls us twice to get soft 4677 * partitions at the top and bottom of the stack. 4678 * thus, if we have a parent, we'll get deleted 4679 * on the next call. 4680 */ 4681 if (MD_HAS_PARENT(msp->common.parent)) 4682 continue; 4683 /* 4684 * If this is a multi-node set, we send a series 4685 * of individual metaclear commands. 4686 */ 4687 if (meta_is_mn_set(sp, ep)) { 4688 if (meta_mn_send_metaclear_command(sp, 4689 np->cname, options, 0, ep) != 0) { 4690 rval = -1; 4691 break; 4692 } 4693 } else { 4694 if (meta_sp_reset(sp, np, options, ep) != 0) { 4695 rval = -1; 4696 break; 4697 } 4698 } 4699 } 4700 /* cleanup return status */ 4701 metafreenamelist(spnlp); 4702 return (rval); 4703 } 4704 4705 /* check the name */ 4706 if (metachkmeta(np, ep) != 0) 4707 return (-1); 4708 4709 /* get the unit structure */ 4710 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4711 return (-1); 4712 4713 /* clear out reset parameters */ 4714 (void) memset(&reset_params, 0, sizeof (reset_params)); 4715 4716 /* if our child is a metadevice, we need to deparent/reparent it */ 4717 if (metaismeta(msp->compnamep)) { 4718 /* get sp's on this component */ 4719 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4720 &spnlp, 1, ep)) <= 0) 4721 /* no sp's on this device. error! */ 4722 return (-1); 4723 else if (num_sp == 1) 4724 /* last sp on this device, so we deparent */ 4725 reset_params.new_parent = MD_NO_PARENT; 4726 else { 4727 /* have to reparent this metadevice */ 4728 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4729 if (meta_getminor(nlp->namep->dev) == 4730 meta_getminor(np->dev)) 4731 continue; 4732 /* 4733 * this isn't the softpart we are deleting, 4734 * so use this device as the new parent. 4735 */ 4736 reset_params.new_parent = 4737 meta_getminor(nlp->namep->dev); 4738 break; 4739 } 4740 } 4741 metafreenamelist(spnlp); 4742 } 4743 4744 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4745 return (-1); 4746 4747 return (0); 4748 } 4749 4750 /* 4751 * FUNCTION: meta_sp_reset_component() 4752 * INPUT: sp - the set name of the device to reset 4753 * name - the string name of the device to reset 4754 * options - metaclear options 4755 * OUTPUT: ep - return error pointer 4756 * RETURNS: int - 0 success, -1 error 4757 * PURPOSE: provides the ability to delete all soft partitions on a 4758 * specified device (metaclear -p). It first gets all of the 4759 * soft partitions on the component and then deletes each one 4760 * individually. 4761 */ 4762 int 4763 meta_sp_reset_component( 4764 mdsetname_t *sp, 4765 char *name, 4766 mdcmdopts_t options, 4767 md_error_t *ep 4768 ) 4769 { 4770 mdname_t *compnp, *np; 4771 mdnamelist_t *spnlp = NULL; 4772 mdnamelist_t *nlp = NULL; 4773 md_sp_t *msp; 4774 int count; 4775 md_sp_reset_t reset_params; 4776 4777 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4778 return (-1); 4779 4780 /* If we're starting out with no soft partitions, it's an error */ 4781 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4782 if (count == 0) 4783 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4784 else if (count < 0) 4785 return (-1); 4786 4787 /* 4788 * clear all soft partitions on this component. 4789 * NOTE: we reparent underlying metadevices as we go so that 4790 * things stay sane. Also, if we encounter an error, we stop 4791 * and go no further in case recovery might be needed. 4792 */ 4793 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4794 /* clear out reset parameters */ 4795 (void) memset(&reset_params, 0, sizeof (reset_params)); 4796 4797 /* check the name */ 4798 np = nlp->namep; 4799 4800 if (metachkmeta(np, ep) != 0) { 4801 metafreenamelist(spnlp); 4802 return (-1); 4803 } 4804 4805 /* get the unit structure */ 4806 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4807 metafreenamelist(spnlp); 4808 return (-1); 4809 } 4810 4811 /* have to deparent/reparent metadevices */ 4812 if (metaismeta(compnp)) { 4813 if (nlp->next == NULL) 4814 reset_params.new_parent = MD_NO_PARENT; 4815 else 4816 reset_params.new_parent = 4817 meta_getminor(spnlp->next->namep->dev); 4818 } 4819 4820 /* clear soft partition */ 4821 if (meta_sp_reset_common(sp, np, msp, reset_params, 4822 options, ep) < 0) { 4823 metafreenamelist(spnlp); 4824 return (-1); 4825 } 4826 } 4827 metafreenamelist(spnlp); 4828 return (0); 4829 } 4830 4831 /* 4832 * ************************************************************************** 4833 * Grow (metattach) Functions * 4834 * ************************************************************************** 4835 */ 4836 4837 /* 4838 * FUNCTION: meta_sp_attach() 4839 * INPUT: sp - the set name of the device to attach to 4840 * np - the name of the device to attach to 4841 * addsize - the unparsed string holding the amount of space to add 4842 * options - metattach options 4843 * alignment - data alignment 4844 * OUTPUT: ep - return error pointer 4845 * RETURNS: int - 0 success, -1 error 4846 * PURPOSE: grows a soft partition by reading in the existing unit 4847 * structure and setting its state to Growing, allocating more 4848 * space (similar to meta_create_sp()), updating the watermarks, 4849 * and then writing out the new unit structure in the Okay state. 4850 */ 4851 int 4852 meta_sp_attach( 4853 mdsetname_t *sp, 4854 mdname_t *np, 4855 char *addsize, 4856 mdcmdopts_t options, 4857 sp_ext_length_t alignment, 4858 md_error_t *ep 4859 ) 4860 { 4861 md_grow_params_t grow_params; 4862 sp_ext_length_t grow_len; /* amount to grow */ 4863 mp_unit_t *mp, *new_un; 4864 mdname_t *compnp = NULL; 4865 4866 sp_ext_node_t *extlist = NULL; 4867 int numexts; 4868 mdnamelist_t *spnlp = NULL; 4869 int count; 4870 md_sp_t *msp; 4871 daddr_t start_block; 4872 4873 /* should have the same set */ 4874 assert(sp != NULL); 4875 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4876 4877 /* check name */ 4878 if (metachkmeta(np, ep) != 0) 4879 return (-1); 4880 4881 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4882 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4883 } 4884 4885 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4886 return (-1); 4887 4888 /* make sure we don't have a parent */ 4889 if (MD_HAS_PARENT(mp->c.un_parent)) { 4890 Free(mp); 4891 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4892 } 4893 4894 if (getenv(META_SP_DEBUG)) { 4895 meta_sp_debug("meta_sp_attach: Unit structure before new " 4896 "space:\n"); 4897 meta_sp_printunit(mp); 4898 } 4899 4900 /* 4901 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4902 * If this was not the case we would suffer the following 4903 * assertion failure: 4904 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4905 * file meta_check.x, line 315 4906 * I guess this is because we have not "seen" this drive before 4907 * and hence hit the failure - this is of course the attach routine 4908 */ 4909 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4910 Free(mp); 4911 return (-1); 4912 } 4913 4914 /* metakeyname does not fill in the key. */ 4915 compnp->key = mp->un_key; 4916 4917 /* work out the space on the component that we are dealing with */ 4918 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4919 4920 /* 4921 * see if the component has been soft partitioned yet, or if an 4922 * error occurred. 4923 */ 4924 if (count == 0) { 4925 Free(mp); 4926 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4927 } else if (count < 0) { 4928 Free(mp); 4929 return (-1); 4930 } 4931 4932 /* 4933 * seed extlist with reserved space at the beginning of the volume and 4934 * enough space for the end watermark. The end watermark always gets 4935 * updated, but if the underlying device changes size it may not be 4936 * pointed to until the extent before it is updated. Since the 4937 * end of the reserved space is where the first watermark starts, 4938 * the reserved extent should never be marked for updating. 4939 */ 4940 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4941 MD_DISKADDR_ERROR) { 4942 Free(mp); 4943 return (-1); 4944 } 4945 4946 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4947 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4948 meta_sp_list_insert(NULL, NULL, &extlist, 4949 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4950 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4951 4952 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4953 Free(mp); 4954 return (-1); 4955 } 4956 4957 metafreenamelist(spnlp); 4958 4959 if (getenv(META_SP_DEBUG)) { 4960 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4961 meta_sp_list_dump(extlist); 4962 } 4963 4964 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4965 4966 assert(mp->un_numexts >= 1); 4967 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4968 mp->un_ext[mp->un_numexts - 1].un_poff, 4969 (alignment > 0) ? alignment : 4970 meta_sp_get_default_alignment(sp, compnp, ep)); 4971 4972 if (numexts == -1) { 4973 Free(mp); 4974 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4975 } 4976 4977 /* allocate new unit structure and copy in old unit */ 4978 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4979 grow_len, numexts, ep)) == NULL) { 4980 Free(mp); 4981 return (-1); 4982 } 4983 Free(mp); 4984 4985 /* If running in dryrun mode (-n option), we're done here */ 4986 if ((options & MDCMD_DOIT) == 0) { 4987 if (options & MDCMD_PRINT) { 4988 (void) printf(dgettext(TEXT_DOMAIN, 4989 "%s: Soft Partition would grow\n"), 4990 np->cname); 4991 (void) fflush(stdout); 4992 } 4993 return (0); 4994 } 4995 4996 if (getenv(META_SP_DEBUG)) { 4997 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 4998 meta_sp_printunit(new_un); 4999 } 5000 5001 assert(new_un != NULL); 5002 5003 (void) memset(&grow_params, 0, sizeof (grow_params)); 5004 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 5005 grow_params.options = MD_CRO_64BIT; 5006 new_un->c.un_revision |= MD_64BIT_META_DEV; 5007 } else { 5008 grow_params.options = MD_CRO_32BIT; 5009 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 5010 } 5011 grow_params.mnum = MD_SID(new_un); 5012 grow_params.size = new_un->c.un_size; 5013 grow_params.mdp = (uintptr_t)new_un; 5014 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5015 5016 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5017 np->cname) != 0) { 5018 (void) mdstealerror(ep, &grow_params.mde); 5019 return (-1); 5020 } 5021 5022 /* update all watermarks */ 5023 5024 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5025 return (-1); 5026 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5027 return (-1); 5028 5029 5030 /* second phase of commit, set status to MD_SP_OK */ 5031 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5032 return (-1); 5033 5034 meta_invalidate_name(np); 5035 5036 if (options & MDCMD_PRINT) { 5037 (void) printf(dgettext(TEXT_DOMAIN, 5038 "%s: Soft Partition has been grown\n"), 5039 np->cname); 5040 (void) fflush(stdout); 5041 } 5042 5043 return (0); 5044 } 5045 5046 /* 5047 * ************************************************************************** 5048 * Recovery (metarecover) Functions * 5049 * ************************************************************************** 5050 */ 5051 5052 /* 5053 * FUNCTION: meta_recover_sp() 5054 * INPUT: sp - the name of the set we are recovering on 5055 * compnp - name pointer for device we are recovering on 5056 * argc - argument count 5057 * argv - left over arguments not parsed by metarecover command 5058 * options - metarecover options 5059 * OUTPUT: ep - return error pointer 5060 * RETURNS: int - 0 - success, -1 - error 5061 * PURPOSE: parse soft partitioning-specific metarecover options and 5062 * dispatch to the appropriate function to handle recovery. 5063 */ 5064 int 5065 meta_recover_sp( 5066 mdsetname_t *sp, 5067 mdname_t *compnp, 5068 int argc, 5069 char *argv[], 5070 mdcmdopts_t options, 5071 md_error_t *ep 5072 ) 5073 { 5074 md_set_desc *sd; 5075 5076 if (argc > 1) { 5077 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5078 argc, argv); 5079 return (-1); 5080 } 5081 5082 /* 5083 * For a MN set, this operation must be performed on the master 5084 * as it is responsible for maintaining the watermarks 5085 */ 5086 if (!metaislocalset(sp)) { 5087 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5088 return (-1); 5089 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5090 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5091 sd->sd_mn_master_nodenm, NULL, NULL); 5092 return (-1); 5093 } 5094 } 5095 if (argc == 0) { 5096 /* 5097 * if no additional arguments are passed, metarecover should 5098 * validate both on-disk and metadb structures as well as 5099 * checking that both are consistent with each other 5100 */ 5101 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5102 return (-1); 5103 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5104 return (-1); 5105 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5106 return (-1); 5107 } else if (strcmp(argv[0], "-d") == 0) { 5108 /* 5109 * Ensure that there is no existing valid record for this 5110 * soft-partition. If there is we have nothing to do. 5111 */ 5112 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5113 return (-1); 5114 /* validate and recover from on-disk structures */ 5115 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5116 return (-1); 5117 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5118 return (-1); 5119 } else if (strcmp(argv[0], "-m") == 0) { 5120 /* validate and recover from metadb structures */ 5121 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5122 return (-1); 5123 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5124 return (-1); 5125 } else { 5126 /* syntax error */ 5127 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5128 argc, argv); 5129 return (-1); 5130 } 5131 5132 return (0); 5133 } 5134 5135 /* 5136 * FUNCTION: meta_sp_display_exthdr() 5137 * INPUT: none 5138 * OUTPUT: none 5139 * RETURNS: void 5140 * PURPOSE: print header line for sp_ext_node_t information. to be used 5141 * in conjunction with meta_sp_display_ext(). 5142 */ 5143 static void 5144 meta_sp_display_exthdr(void) 5145 { 5146 (void) printf("%20s %5s %7s %20s %20s\n", 5147 dgettext(TEXT_DOMAIN, "Name"), 5148 dgettext(TEXT_DOMAIN, "Seq#"), 5149 dgettext(TEXT_DOMAIN, "Type"), 5150 dgettext(TEXT_DOMAIN, "Offset"), 5151 dgettext(TEXT_DOMAIN, "Length")); 5152 } 5153 5154 5155 /* 5156 * FUNCTION: meta_sp_display_ext() 5157 * INPUT: ext - extent to display 5158 * OUTPUT: none 5159 * RETURNS: void 5160 * PURPOSE: print selected fields from sp_ext_node_t. 5161 */ 5162 static void 5163 meta_sp_display_ext(sp_ext_node_t *ext) 5164 { 5165 /* print extent information */ 5166 if (ext->ext_namep != NULL) 5167 (void) printf("%20s ", ext->ext_namep->cname); 5168 else 5169 (void) printf("%20s ", "NONE"); 5170 5171 (void) printf("%5u ", ext->ext_seq); 5172 5173 switch (ext->ext_type) { 5174 case EXTTYP_ALLOC: 5175 (void) printf("%7s ", "ALLOC"); 5176 break; 5177 case EXTTYP_FREE: 5178 (void) printf("%7s ", "FREE"); 5179 break; 5180 case EXTTYP_RESERVED: 5181 (void) printf("%7s ", "RESV"); 5182 break; 5183 case EXTTYP_END: 5184 (void) printf("%7s ", "END"); 5185 break; 5186 default: 5187 (void) printf("%7s ", "INVLD"); 5188 break; 5189 } 5190 5191 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5192 } 5193 5194 5195 /* 5196 * FUNCTION: meta_sp_checkseq() 5197 * INPUT: extlist - list of extents to be checked 5198 * OUTPUT: none 5199 * RETURNS: int - 0 - success, -1 - error 5200 * PURPOSE: check soft partition sequence numbers. this function assumes 5201 * that a list of extents representing 1 or more soft partitions 5202 * is passed in sorted in sequence number order. within a 5203 * single soft partition, there may not be any missing or 5204 * duplicate sequence numbers. 5205 */ 5206 static int 5207 meta_sp_checkseq(sp_ext_node_t *extlist) 5208 { 5209 sp_ext_node_t *ext; 5210 5211 assert(extlist != NULL); 5212 5213 for (ext = extlist; 5214 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5215 ext = ext->ext_next) { 5216 if (ext->ext_next->ext_namep != NULL && 5217 strcmp(ext->ext_next->ext_namep->cname, 5218 ext->ext_namep->cname) != 0) 5219 continue; 5220 5221 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5222 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5223 "%s: sequence numbers are " 5224 "incorrect: %d should be %d\n"), 5225 ext->ext_next->ext_namep->cname, 5226 ext->ext_next->ext_seq, ext->ext_seq + 1); 5227 return (-1); 5228 } 5229 } 5230 return (0); 5231 } 5232 5233 5234 /* 5235 * FUNCTION: meta_sp_resolve_name_conflict() 5236 * INPUT: sp - name of set we're are recovering in. 5237 * old_np - name pointer of soft partition we found on disk. 5238 * OUTPUT: new_np - name pointer for new soft partition name. 5239 * ep - error pointer returned. 5240 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5241 * PURPOSE: Check to see if the name of one of the soft partitions we found 5242 * on disk already exists in the metadb. If so, prompt for a new 5243 * name. In addition, we keep a static array of names that 5244 * will be recovered from this device since these names don't 5245 * exist in the configuration at this point but cannot be 5246 * recovered more than once. 5247 */ 5248 static int 5249 meta_sp_resolve_name_conflict( 5250 mdsetname_t *sp, 5251 mdname_t *old_np, 5252 mdname_t **new_np, 5253 md_error_t *ep 5254 ) 5255 { 5256 char yesno[255]; 5257 char *yes; 5258 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5259 int nunits; 5260 static int *used_names = NULL; 5261 5262 assert(old_np != NULL); 5263 5264 if (used_names == NULL) { 5265 if ((nunits = meta_get_nunits(ep)) < 0) 5266 return (-1); 5267 used_names = Zalloc(nunits * sizeof (int)); 5268 } 5269 5270 /* see if it exists already */ 5271 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5272 metagetmiscname(old_np, ep) == NULL) { 5273 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5274 return (-1); 5275 else { 5276 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5277 mdclrerror(ep); 5278 return (0); 5279 } 5280 } 5281 5282 /* name exists, ask the user for a new one */ 5283 (void) printf(dgettext(TEXT_DOMAIN, 5284 "WARNING: A soft partition named %s was found in the extent\n" 5285 "headers, but this name already exists in the metadb " 5286 "configuration.\n" 5287 "In order to continue recovery you must supply\n" 5288 "a new name for this soft partition.\n"), old_np->cname); 5289 (void) printf(dgettext(TEXT_DOMAIN, 5290 "Would you like to continue and supply a new name? (yes/no) ")); 5291 5292 (void) fflush(stdout); 5293 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5294 (strlen(yesno) == 1)) 5295 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5296 dgettext(TEXT_DOMAIN, "no")); 5297 yes = dgettext(TEXT_DOMAIN, "yes"); 5298 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5299 return (-1); 5300 } 5301 5302 (void) fflush(stdin); 5303 5304 /* get the new name */ 5305 for (;;) { 5306 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5307 "for this soft partition (dXXXX) ")); 5308 (void) fflush(stdout); 5309 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5310 (void) strcpy(newname, ""); 5311 5312 /* remove newline character */ 5313 if (newname[strlen(newname) - 1] == '\n') 5314 newname[strlen(newname) - 1] = '\0'; 5315 5316 if (!(is_metaname(newname)) || 5317 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5318 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5319 "Invalid metadevice name\n")); 5320 (void) fflush(stderr); 5321 continue; 5322 } 5323 5324 if ((*new_np = metaname(&sp, newname, 5325 META_DEVICE, ep)) == NULL) { 5326 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5327 "Invalid metadevice name\n")); 5328 (void) fflush(stderr); 5329 continue; 5330 } 5331 5332 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5333 /* make sure the name isn't already being used */ 5334 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5335 metagetmiscname(*new_np, ep) != NULL) { 5336 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5337 "That name already exists\n")); 5338 continue; 5339 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5340 return (-1); 5341 5342 break; 5343 } 5344 5345 /* got a new name, place in used array and return */ 5346 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5347 mdclrerror(ep); 5348 return (1); 5349 } 5350 5351 /* 5352 * FUNCTION: meta_sp_validate_wm() 5353 * INPUT: sp - set name we are recovering in 5354 * compnp - name pointer for device we are recovering from 5355 * options - metarecover options 5356 * OUTPUT: ep - error pointer returned 5357 * RETURNS: int - 0 - success, -1 - error 5358 * PURPOSE: validate and display watermark configuration. walk the 5359 * on-disk watermark structures and validate the information 5360 * found within. since a watermark configuration is 5361 * "self-defining", the act of traversing the watermarks 5362 * is part of the validation process. 5363 */ 5364 static int 5365 meta_sp_validate_wm( 5366 mdsetname_t *sp, 5367 mdname_t *compnp, 5368 mdcmdopts_t options, 5369 md_error_t *ep 5370 ) 5371 { 5372 sp_ext_node_t *extlist = NULL; 5373 sp_ext_node_t *ext; 5374 int num_sps = 0; 5375 int rval; 5376 5377 if ((options & MDCMD_VERBOSE) != 0) 5378 (void) printf(dgettext(TEXT_DOMAIN, 5379 "Verifying on-disk structures on %s.\n"), 5380 compnp->cname); 5381 5382 /* 5383 * for each watermark, build an ext_node, place on list. 5384 */ 5385 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5386 meta_sp_cmp_by_nameseq, ep); 5387 5388 if ((options & MDCMD_VERBOSE) != 0) { 5389 /* print out what we found */ 5390 if (extlist == NULL) 5391 (void) printf(dgettext(TEXT_DOMAIN, 5392 "No extent headers found on %s.\n"), 5393 compnp->cname); 5394 else { 5395 (void) printf(dgettext(TEXT_DOMAIN, 5396 "The following extent headers were found on %s.\n"), 5397 compnp->cname); 5398 meta_sp_display_exthdr(); 5399 } 5400 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5401 meta_sp_display_ext(ext); 5402 } 5403 5404 if (rval < 0) { 5405 (void) printf(dgettext(TEXT_DOMAIN, 5406 "%s: On-disk structures invalid or " 5407 "no soft partitions found.\n"), 5408 compnp->cname); 5409 return (-1); 5410 } 5411 5412 assert(extlist != NULL); 5413 5414 /* count number of soft partitions */ 5415 for (ext = extlist; 5416 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5417 ext = ext->ext_next) { 5418 if (ext->ext_next != NULL && 5419 ext->ext_next->ext_namep != NULL && 5420 strcmp(ext->ext_next->ext_namep->cname, 5421 ext->ext_namep->cname) == 0) 5422 continue; 5423 num_sps++; 5424 } 5425 5426 if ((options & MDCMD_VERBOSE) != 0) 5427 (void) printf(dgettext(TEXT_DOMAIN, 5428 "Found %d soft partition(s) on %s.\n"), num_sps, 5429 compnp->cname); 5430 5431 if (num_sps == 0) { 5432 (void) printf(dgettext(TEXT_DOMAIN, 5433 "%s: No soft partitions.\n"), compnp->cname); 5434 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5435 } 5436 5437 /* check sequence numbers */ 5438 if ((options & MDCMD_VERBOSE) != 0) 5439 (void) printf(dgettext(TEXT_DOMAIN, 5440 "Checking sequence numbers.\n")); 5441 5442 if (meta_sp_checkseq(extlist) != 0) 5443 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5444 5445 return (0); 5446 } 5447 5448 /* 5449 * FUNCTION: meta_sp_validate_unit() 5450 * INPUT: sp - name of set we are recovering in 5451 * compnp - name of component we are recovering from 5452 * options - metarecover options 5453 * OUTPUT: ep - error pointer returned 5454 * RETURNS: int - 0 - success, -1 - error 5455 * PURPOSE: validate and display metadb configuration. begin by getting 5456 * all soft partitions built on the specified component. get 5457 * the unit structure for each one and validate the fields within. 5458 */ 5459 static int 5460 meta_sp_validate_unit( 5461 mdsetname_t *sp, 5462 mdname_t *compnp, 5463 mdcmdopts_t options, 5464 md_error_t *ep 5465 ) 5466 { 5467 md_sp_t *msp; 5468 mdnamelist_t *spnlp = NULL; 5469 mdnamelist_t *namep = NULL; 5470 int count; 5471 uint_t extn; 5472 sp_ext_length_t size; 5473 5474 if ((options & MDCMD_VERBOSE) != 0) 5475 (void) printf(dgettext(TEXT_DOMAIN, 5476 "%s: Validating soft partition metadb entries.\n"), 5477 compnp->cname); 5478 5479 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5480 return (-1); 5481 5482 /* get all soft partitions on component */ 5483 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5484 5485 if (count == 0) { 5486 (void) printf(dgettext(TEXT_DOMAIN, 5487 "%s: No soft partitions.\n"), compnp->cname); 5488 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5489 } else if (count < 0) { 5490 return (-1); 5491 } 5492 5493 /* Now go through the soft partitions and check each one */ 5494 for (namep = spnlp; namep != NULL; namep = namep->next) { 5495 mdname_t *curnp = namep->namep; 5496 sp_ext_offset_t curvoff; 5497 5498 /* get the unit structure */ 5499 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5500 return (-1); 5501 5502 /* verify generic unit structure parameters */ 5503 if ((options & MDCMD_VERBOSE) != 0) 5504 (void) printf(dgettext(TEXT_DOMAIN, 5505 "\nVerifying device %s.\n"), 5506 curnp->cname); 5507 5508 /* 5509 * MD_SP_LAST is an invalid state and is always the 5510 * highest numbered. 5511 */ 5512 if (msp->status >= MD_SP_LAST) { 5513 (void) printf(dgettext(TEXT_DOMAIN, 5514 "%s: status value %u is out of range.\n"), 5515 curnp->cname, msp->status); 5516 return (mdmderror(ep, MDE_RECOVER_FAILED, 5517 0, curnp->cname)); 5518 } else if ((options & MDCMD_VERBOSE) != 0) { 5519 uint_t tstate = 0; 5520 5521 if (metaismeta(msp->compnamep)) { 5522 if (meta_get_tstate(msp->common.namep->dev, 5523 &tstate, ep) != 0) 5524 return (-1); 5525 } 5526 (void) printf(dgettext(TEXT_DOMAIN, 5527 "%s: Status \"%s\" is valid.\n"), 5528 curnp->cname, meta_sp_status_to_name(msp->status, 5529 tstate & MD_DEV_ERRORED)); 5530 } 5531 5532 /* Now verify each extent */ 5533 if ((options & MDCMD_VERBOSE) != 0) 5534 (void) printf("%14s %21s %21s %21s\n", 5535 dgettext(TEXT_DOMAIN, "Extent Number"), 5536 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5537 dgettext(TEXT_DOMAIN, "Physical Offset"), 5538 dgettext(TEXT_DOMAIN, "Length")); 5539 5540 curvoff = 0ULL; 5541 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5542 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5543 5544 if ((options & MDCMD_VERBOSE) != 0) 5545 (void) printf("%14u %21llu %21llu %21llu\n", 5546 extn, extp->voff, extp->poff, extp->len); 5547 5548 if (extp->voff != curvoff) { 5549 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5550 "%s: virtual offset for extent %u " 5551 "is inconsistent, expected %llu, " 5552 "got %llu.\n"), curnp->cname, extn, 5553 curvoff, extp->voff); 5554 return (mdmderror(ep, MDE_RECOVER_FAILED, 5555 0, compnp->cname)); 5556 } 5557 5558 /* make sure extent does not drop off the end */ 5559 if ((extp->poff + extp->len) == size) { 5560 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5561 "%s: extent %u at offset %llu, " 5562 "length %llu exceeds the size of the " 5563 "device, %llu.\n"), curnp->cname, 5564 extn, extp->poff, extp->len, size); 5565 return (mdmderror(ep, MDE_RECOVER_FAILED, 5566 0, compnp->cname)); 5567 } 5568 5569 curvoff += extp->len; 5570 } 5571 } 5572 if (options & MDCMD_PRINT) { 5573 (void) printf(dgettext(TEXT_DOMAIN, 5574 "%s: Soft Partition metadb configuration is valid\n"), 5575 compnp->cname); 5576 } 5577 return (0); 5578 } 5579 5580 /* 5581 * FUNCTION: meta_sp_validate_wm_and_unit() 5582 * INPUT: sp - name of set we are recovering in 5583 * compnp - name of device we are recovering from 5584 * options - metarecover options 5585 * OUTPUT: ep - error pointer returned 5586 * RETURNS: int - 0 - success, -1 error 5587 * PURPOSE: cross-validate and display watermarks and metadb records. 5588 * get both the unit structures for the soft partitions built 5589 * on the specified component and the watermarks found on that 5590 * component and check to make sure they are consistent with 5591 * each other. 5592 */ 5593 static int 5594 meta_sp_validate_wm_and_unit( 5595 mdsetname_t *sp, 5596 mdname_t *np, 5597 mdcmdopts_t options, 5598 md_error_t *ep 5599 ) 5600 { 5601 sp_ext_node_t *wmlist = NULL; 5602 sp_ext_node_t *unitlist = NULL; 5603 sp_ext_node_t *unitext; 5604 sp_ext_node_t *wmext; 5605 sp_ext_offset_t tmpunitoff; 5606 mdnamelist_t *spnlp = NULL; 5607 int count; 5608 int rval = 0; 5609 int verbose = (options & MDCMD_VERBOSE); 5610 5611 /* get unit structure list */ 5612 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5613 if (count <= 0) 5614 return (-1); 5615 5616 meta_sp_list_insert(NULL, NULL, &unitlist, 5617 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5618 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5619 5620 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5621 metafreenamelist(spnlp); 5622 return (-1); 5623 } 5624 5625 metafreenamelist(spnlp); 5626 5627 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5628 5629 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5630 meta_sp_cmp_by_offset, ep) < 0) { 5631 meta_sp_list_free(&unitlist); 5632 return (-1); 5633 } 5634 5635 if (getenv(META_SP_DEBUG)) { 5636 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5637 meta_sp_list_dump(unitlist); 5638 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5639 meta_sp_list_dump(wmlist); 5640 } 5641 5642 /* 5643 * step through both lists and compare allocated nodes. Free 5644 * nodes and end watermarks may differ between the two but 5645 * that's generally ok, and if they're wrong will typically 5646 * cause misplaced allocated extents. 5647 */ 5648 if (verbose) 5649 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5650 "allocations match extent headers.\n"), np->cname); 5651 5652 unitext = unitlist; 5653 wmext = wmlist; 5654 while ((wmext != NULL) && (unitext != NULL)) { 5655 /* find next allocated extents in each list */ 5656 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5657 wmext = wmext->ext_next; 5658 5659 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5660 unitext = unitext->ext_next; 5661 5662 if (wmext == NULL || unitext == NULL) 5663 break; 5664 5665 if (verbose) { 5666 (void) printf(dgettext(TEXT_DOMAIN, 5667 "Metadb extent:\n")); 5668 meta_sp_display_exthdr(); 5669 meta_sp_display_ext(unitext); 5670 (void) printf(dgettext(TEXT_DOMAIN, 5671 "Extent header extent:\n")); 5672 meta_sp_display_exthdr(); 5673 meta_sp_display_ext(wmext); 5674 (void) printf("\n"); 5675 } 5676 5677 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5678 rval = -1; 5679 5680 /* 5681 * if the offsets aren't equal, only increment the 5682 * lowest one in hopes of getting the lists back in sync. 5683 */ 5684 tmpunitoff = unitext->ext_offset; 5685 if (unitext->ext_offset <= wmext->ext_offset) 5686 unitext = unitext->ext_next; 5687 if (wmext->ext_offset <= tmpunitoff) 5688 wmext = wmext->ext_next; 5689 } 5690 5691 /* 5692 * if both lists aren't at the end then there are extra 5693 * allocated nodes in one of them. 5694 */ 5695 if (wmext != NULL) { 5696 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5697 "%s: extent headers contain allocations not in " 5698 "the metadb\n\n"), np->cname); 5699 rval = -1; 5700 } 5701 5702 if (unitext != NULL) { 5703 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5704 "%s: metadb contains allocations not in the extent " 5705 "headers\n\n"), np->cname); 5706 rval = -1; 5707 } 5708 5709 if (options & MDCMD_PRINT) { 5710 if (rval == 0) { 5711 (void) printf(dgettext(TEXT_DOMAIN, 5712 "%s: Soft Partition metadb matches extent " 5713 "header configuration\n"), np->cname); 5714 } else { 5715 (void) printf(dgettext(TEXT_DOMAIN, 5716 "%s: Soft Partition metadb does not match extent " 5717 "header configuration\n"), np->cname); 5718 } 5719 } 5720 5721 return (rval); 5722 } 5723 5724 /* 5725 * FUNCTION: meta_sp_validate_exts() 5726 * INPUT: compnp - name pointer for device we are recovering from 5727 * wmext - extent node representing watermark 5728 * unitext - extent node from unit structure 5729 * OUTPUT: ep - return error pointer 5730 * RETURNS: int - 0 - succes, mdmderror return code - error 5731 * PURPOSE: Takes two extent nodes and checks them against each other. 5732 * offset, length, sequence number, set, and name are compared. 5733 */ 5734 static int 5735 meta_sp_validate_exts( 5736 mdname_t *compnp, 5737 sp_ext_node_t *wmext, 5738 sp_ext_node_t *unitext, 5739 md_error_t *ep 5740 ) 5741 { 5742 if (wmext->ext_offset != unitext->ext_offset) { 5743 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5744 "%s: unit structure and extent header offsets differ.\n"), 5745 compnp->cname); 5746 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5747 } 5748 5749 if (wmext->ext_length != unitext->ext_length) { 5750 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5751 "%s: unit structure and extent header lengths differ.\n"), 5752 compnp->cname); 5753 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5754 } 5755 5756 if (wmext->ext_seq != unitext->ext_seq) { 5757 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5758 "%s: unit structure and extent header sequence numbers " 5759 "differ.\n"), compnp->cname); 5760 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5761 } 5762 5763 if (wmext->ext_type != unitext->ext_type) { 5764 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5765 "%s: unit structure and extent header types differ.\n"), 5766 compnp->cname); 5767 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5768 } 5769 5770 /* 5771 * If one has a set pointer and the other doesn't, error. 5772 * If both extents have setnames, then make sure they match 5773 * If both are NULL, it's ok, they match. 5774 */ 5775 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5776 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5777 "%s: unit structure and extent header set values " 5778 "differ.\n"), compnp->cname); 5779 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5780 } 5781 5782 if (unitext->ext_setp != NULL) { 5783 if (strcmp(unitext->ext_setp->setname, 5784 wmext->ext_setp->setname) != 0) { 5785 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5786 "%s: unit structure and extent header set names " 5787 "differ.\n"), compnp->cname); 5788 return (mdmderror(ep, MDE_RECOVER_FAILED, 5789 0, compnp->cname)); 5790 } 5791 } 5792 5793 /* 5794 * If one has a name pointer and the other doesn't, error. 5795 * If both extents have names, then make sure they match 5796 * If both are NULL, it's ok, they match. 5797 */ 5798 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5799 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5800 "%s: unit structure and extent header name values " 5801 "differ.\n"), compnp->cname); 5802 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5803 } 5804 5805 if (unitext->ext_namep != NULL) { 5806 if (strcmp(wmext->ext_namep->cname, 5807 unitext->ext_namep->cname) != 0) { 5808 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5809 "%s: unit structure and extent header names " 5810 "differ.\n"), compnp->cname); 5811 return (mdmderror(ep, MDE_RECOVER_FAILED, 5812 0, compnp->cname)); 5813 } 5814 } 5815 5816 return (0); 5817 } 5818 5819 /* 5820 * FUNCTION: update_sp_status() 5821 * INPUT: sp - name of set we are recovering in 5822 * minors - pointer to an array of soft partition minor numbers 5823 * num_sps - number of minor numbers in array 5824 * status - new status to be applied to all soft parts in array 5825 * mn_set - set if current set is a multi-node set 5826 * OUTPUT: ep - return error pointer 5827 * RETURNS: int - 0 - success, -1 - error 5828 * PURPOSE: update status of soft partitions to new status. minors is an 5829 * array of minor numbers to apply the new status to. 5830 * If mn_set is set, a message is sent to all nodes in the 5831 * cluster to update the status locally. 5832 */ 5833 static int 5834 update_sp_status( 5835 mdsetname_t *sp, 5836 minor_t *minors, 5837 int num_sps, 5838 sp_status_t status, 5839 bool_t mn_set, 5840 md_error_t *ep 5841 ) 5842 { 5843 int i; 5844 int err = 0; 5845 5846 if (mn_set) { 5847 md_mn_msg_sp_setstat_t sp_setstat_params; 5848 int result; 5849 md_mn_result_t *resp = NULL; 5850 5851 for (i = 0; i < num_sps; i++) { 5852 sp_setstat_params.sp_setstat_mnum = minors[i]; 5853 sp_setstat_params.sp_setstat_status = status; 5854 5855 result = mdmn_send_message(sp->setno, 5856 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0, 5857 (char *)&sp_setstat_params, 5858 sizeof (sp_setstat_params), 5859 &resp, ep); 5860 if (resp != NULL) { 5861 if (resp->mmr_exitval != 0) 5862 err = -1; 5863 free_result(resp); 5864 } 5865 if (result != 0) { 5866 err = -1; 5867 } 5868 } 5869 } else { 5870 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5871 err = -1; 5872 } 5873 if (err < 0) { 5874 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5875 "Error updating status on recovered soft " 5876 "partitions.\n")); 5877 } 5878 return (err); 5879 } 5880 5881 /* 5882 * FUNCTION: meta_sp_recover_from_wm() 5883 * INPUT: sp - name of set we are recovering in 5884 * compnp - name pointer for component we are recovering from 5885 * options - metarecover options 5886 * OUTPUT: ep - return error pointer 5887 * RETURNS: int - 0 - success, -1 - error 5888 * PURPOSE: update metadb records to match watermarks. begin by getting 5889 * an extlist representing all soft partitions on the component. 5890 * then build a unit structure for each soft partition. 5891 * notify user of changes, then commit each soft partition to 5892 * the metadb one at a time in the "recovering" state. update 5893 * any watermarks that may need it (to reflect possible name 5894 * changes), and, finally, set the status of all recovered 5895 * partitions to the "OK" state at once. 5896 */ 5897 static int 5898 meta_sp_recover_from_wm( 5899 mdsetname_t *sp, 5900 mdname_t *compnp, 5901 mdcmdopts_t options, 5902 md_error_t *ep 5903 ) 5904 { 5905 sp_ext_node_t *extlist = NULL; 5906 sp_ext_node_t *sp_list = NULL; 5907 sp_ext_node_t *update_list = NULL; 5908 sp_ext_node_t *ext; 5909 sp_ext_node_t *sp_ext; 5910 mp_unit_t *mp; 5911 mp_unit_t **un_array; 5912 int numexts = 0, num_sps = 0, i = 0; 5913 int err = 0; 5914 int not_recovered = 0; 5915 int committed = 0; 5916 sp_ext_length_t sp_length = 0LL; 5917 mdnamelist_t *keynlp = NULL; 5918 mdname_t *np; 5919 mdname_t *new_np; 5920 int new_name; 5921 md_set_params_t set_params; 5922 minor_t *minors = NULL; 5923 char yesno[255]; 5924 char *yes; 5925 bool_t mn_set = 0; 5926 md_set_desc *sd; 5927 mm_unit_t *mm; 5928 md_set_mmown_params_t *ownpar = NULL; 5929 int comp_is_mirror = 0; 5930 5931 /* 5932 * if this component appears in another metadevice already, do 5933 * NOT recover from it. 5934 */ 5935 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5936 return (-1); 5937 5938 /* set flag if dealing with a MN set */ 5939 if (!metaislocalset(sp)) { 5940 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5941 return (-1); 5942 } 5943 if (MD_MNSET_DESC(sd)) 5944 mn_set = 1; 5945 } 5946 /* 5947 * for each watermark, build an ext_node, place on list. 5948 */ 5949 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5950 meta_sp_cmp_by_nameseq, ep) < 0) 5951 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5952 5953 assert(extlist != NULL); 5954 5955 /* count number of soft partitions */ 5956 for (ext = extlist; 5957 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5958 ext = ext->ext_next) { 5959 if (ext->ext_next != NULL && 5960 ext->ext_next->ext_namep != NULL && 5961 strcmp(ext->ext_next->ext_namep->cname, 5962 ext->ext_namep->cname) == 0) 5963 continue; 5964 num_sps++; 5965 } 5966 5967 /* allocate array of unit structure pointers */ 5968 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5969 5970 /* 5971 * build unit structures from list of ext_nodes. 5972 */ 5973 for (ext = extlist; 5974 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5975 ext = ext->ext_next) { 5976 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5977 &sp_list, ext->ext_offset, ext->ext_length, 5978 ext->ext_type, ext->ext_seq, ext->ext_flags, 5979 meta_sp_cmp_by_nameseq); 5980 5981 numexts++; 5982 sp_length += ext->ext_length - MD_SP_WMSIZE; 5983 5984 if (ext->ext_next != NULL && 5985 ext->ext_next->ext_namep != NULL && 5986 strcmp(ext->ext_next->ext_namep->cname, 5987 ext->ext_namep->cname) == 0) 5988 continue; 5989 5990 /* 5991 * if we made it here, we are at a soft partition 5992 * boundary in the list. 5993 */ 5994 if (getenv(META_SP_DEBUG)) { 5995 meta_sp_debug("meta_recover_from_wm: dumping wm " 5996 "list:\n"); 5997 meta_sp_list_dump(sp_list); 5998 } 5999 6000 assert(sp_list != NULL); 6001 assert(sp_list->ext_namep != NULL); 6002 6003 if ((new_name = meta_sp_resolve_name_conflict(sp, 6004 sp_list->ext_namep, &new_np, ep)) < 0) { 6005 err = 1; 6006 goto out; 6007 } else if (new_name) { 6008 for (sp_ext = sp_list; 6009 sp_ext != NULL; 6010 sp_ext = sp_ext->ext_next) { 6011 /* 6012 * insert into the update list for 6013 * watermark update. 6014 */ 6015 meta_sp_list_insert(sp_ext->ext_setp, 6016 new_np, &update_list, sp_ext->ext_offset, 6017 sp_ext->ext_length, sp_ext->ext_type, 6018 sp_ext->ext_seq, EXTFLG_UPDATE, 6019 meta_sp_cmp_by_offset); 6020 } 6021 6022 } 6023 if (options & MDCMD_DOIT) { 6024 /* store name in namespace */ 6025 if (mn_set) { 6026 /* send message to all nodes to return key */ 6027 md_mn_msg_addkeyname_t *send_params; 6028 int result; 6029 md_mn_result_t *resp = NULL; 6030 int message_size; 6031 6032 message_size = sizeof (*send_params) + 6033 strlen(compnp->cname) + 1; 6034 send_params = Zalloc(message_size); 6035 send_params->addkeyname_setno = sp->setno; 6036 (void) strcpy(&send_params->addkeyname_name[0], 6037 compnp->cname); 6038 result = mdmn_send_message(sp->setno, 6039 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6040 0, (char *)send_params, message_size, &resp, 6041 ep); 6042 Free(send_params); 6043 if (resp != NULL) { 6044 if (resp->mmr_exitval >= 0) { 6045 compnp->key = 6046 (mdkey_t)resp->mmr_exitval; 6047 } else { 6048 err = 1; 6049 free_result(resp); 6050 goto out; 6051 } 6052 free_result(resp); 6053 } 6054 if (result != 0) { 6055 err = 1; 6056 goto out; 6057 } 6058 (void) metanamelist_append(&keynlp, compnp); 6059 } else { 6060 if (add_key_name(sp, compnp, &keynlp, 6061 ep) != 0) { 6062 err = 1; 6063 goto out; 6064 } 6065 } 6066 } 6067 6068 /* create the unit structure */ 6069 if ((mp = meta_sp_createunit( 6070 (new_name) ? new_np : sp_list->ext_namep, compnp, 6071 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6072 err = 1; 6073 goto out; 6074 } 6075 6076 if (getenv(META_SP_DEBUG)) { 6077 meta_sp_debug("meta_sp_recover_from_wm: " 6078 "printing newly created unit structure"); 6079 meta_sp_printunit(mp); 6080 } 6081 6082 /* place in unit structure array */ 6083 un_array[i++] = mp; 6084 6085 /* free sp_list */ 6086 meta_sp_list_free(&sp_list); 6087 sp_list = NULL; 6088 numexts = 0; 6089 sp_length = 0LL; 6090 } 6091 6092 /* display configuration updates */ 6093 (void) printf(dgettext(TEXT_DOMAIN, 6094 "The following soft partitions were found and will be added to\n" 6095 "your metadevice configuration.\n")); 6096 (void) printf("%5s %15s %18s\n", 6097 dgettext(TEXT_DOMAIN, "Name"), 6098 dgettext(TEXT_DOMAIN, "Size"), 6099 dgettext(TEXT_DOMAIN, "No. of Extents")); 6100 for (i = 0; i < num_sps; i++) { 6101 (void) printf("%5s%lu %15llu %9d\n", "d", 6102 MD_MIN2UNIT(MD_SID(un_array[i])), 6103 un_array[i]->un_length, un_array[i]->un_numexts); 6104 } 6105 6106 if (!(options & MDCMD_DOIT)) { 6107 not_recovered = 1; 6108 goto out; 6109 } 6110 6111 /* ask user for confirmation */ 6112 (void) printf(dgettext(TEXT_DOMAIN, 6113 "WARNING: You are about to add one or more soft partition\n" 6114 "metadevices to your metadevice configuration. If there\n" 6115 "appears to be an error in the soft partition(s) displayed\n" 6116 "above, do NOT proceed with this recovery operation.\n")); 6117 (void) printf(dgettext(TEXT_DOMAIN, 6118 "Are you sure you want to do this (yes/no)? ")); 6119 6120 (void) fflush(stdout); 6121 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6122 (strlen(yesno) == 1)) 6123 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6124 dgettext(TEXT_DOMAIN, "no")); 6125 yes = dgettext(TEXT_DOMAIN, "yes"); 6126 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6127 not_recovered = 1; 6128 goto out; 6129 } 6130 6131 /* commit records one at a time */ 6132 for (i = 0; i < num_sps; i++) { 6133 (void) memset(&set_params, 0, sizeof (set_params)); 6134 set_params.mnum = MD_SID(un_array[i]); 6135 set_params.size = (un_array[i])->c.un_size; 6136 set_params.mdp = (uintptr_t)(un_array[i]); 6137 set_params.options = 6138 meta_check_devicesize(un_array[i]->un_length); 6139 if (set_params.options == MD_CRO_64BIT) { 6140 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6141 } else { 6142 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6143 } 6144 MD_SETDRIVERNAME(&set_params, MD_SP, 6145 MD_MIN2SET(set_params.mnum)); 6146 6147 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6148 6149 /* 6150 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6151 */ 6152 if (mn_set) { 6153 md_mn_msg_iocset_t send_params; 6154 int result; 6155 md_mn_result_t *resp = NULL; 6156 int mess_size; 6157 6158 /* 6159 * Calculate message size. md_mn_msg_iocset_t only 6160 * contains one extent, so increment the size to 6161 * include all extents 6162 */ 6163 mess_size = sizeof (send_params) - 6164 sizeof (mp_ext_t) + 6165 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6166 6167 send_params.iocset_params = set_params; 6168 (void) memcpy(&send_params.unit, un_array[i], 6169 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6170 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6171 result = mdmn_send_message(sp->setno, 6172 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0, 6173 (char *)&send_params, mess_size, &resp, 6174 ep); 6175 if (resp != NULL) { 6176 if (resp->mmr_exitval != 0) 6177 err = 1; 6178 free_result(resp); 6179 } 6180 if (result != 0) { 6181 err = 1; 6182 } 6183 } else { 6184 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6185 np->cname) != 0) { 6186 err = 1; 6187 } 6188 } 6189 6190 if (err == 1) { 6191 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6192 "%s: Error committing record to metadb.\n"), 6193 np->cname); 6194 goto out; 6195 } 6196 6197 /* note that we've committed a record */ 6198 if (!committed) 6199 committed = 1; 6200 6201 /* update any watermarks that need it */ 6202 if (update_list != NULL) { 6203 md_sp_t *msp; 6204 6205 /* 6206 * Check to see if we're trying to create a partition 6207 * on a mirror. If so we may have to enforce an 6208 * ownership change before writing the watermark out. 6209 */ 6210 if (metaismeta(compnp)) { 6211 char *miscname; 6212 6213 miscname = metagetmiscname(compnp, ep); 6214 if (miscname != NULL) 6215 comp_is_mirror = (strcmp(miscname, 6216 MD_MIRROR) == 0); 6217 else 6218 comp_is_mirror = 0; 6219 } 6220 /* 6221 * If this is a MN set and the component is a mirror, 6222 * change ownership to this node in order to write the 6223 * watermarks 6224 */ 6225 if (mn_set && comp_is_mirror) { 6226 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6227 if (mm == NULL) { 6228 err = 1; 6229 goto out; 6230 } else { 6231 err = meta_mn_change_owner(&ownpar, 6232 sp->setno, 6233 meta_getminor(compnp->dev), 6234 sd->sd_mn_mynode->nd_nodeid, 6235 MD_MN_MM_PREVENT_CHANGE | 6236 MD_MN_MM_SPAWN_THREAD); 6237 if (err != 0) 6238 goto out; 6239 } 6240 } 6241 6242 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6243 err = 1; 6244 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6245 "%s: Error updating extent headers.\n"), 6246 np->cname); 6247 goto out; 6248 } 6249 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6250 err = 1; 6251 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6252 "%s: Error updating extent headers " 6253 "on disk.\n"), np->cname); 6254 goto out; 6255 } 6256 } 6257 /* 6258 * If we have changed ownership earlier and prevented any 6259 * ownership changes, we can now allow ownership changes 6260 * again. 6261 */ 6262 if (ownpar) { 6263 (void) meta_mn_change_owner(&ownpar, sp->setno, 6264 ownpar->d.mnum, 6265 ownpar->d.owner, 6266 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6267 } 6268 } 6269 6270 /* update status of all soft partitions to OK */ 6271 minors = Zalloc(num_sps * sizeof (minor_t)); 6272 for (i = 0; i < num_sps; i++) 6273 minors[i] = MD_SID(un_array[i]); 6274 6275 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6276 if (err != 0) 6277 goto out; 6278 6279 if (options & MDCMD_PRINT) 6280 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6281 "Soft Partitions recovered from device.\n"), 6282 compnp->cname); 6283 out: 6284 /* free memory */ 6285 if (extlist != NULL) 6286 meta_sp_list_free(&extlist); 6287 if (sp_list != NULL) 6288 meta_sp_list_free(&sp_list); 6289 if (update_list != NULL) 6290 meta_sp_list_free(&update_list); 6291 if (un_array != NULL) { 6292 for (i = 0; i < num_sps; i++) 6293 Free(un_array[i]); 6294 Free(un_array); 6295 } 6296 if (minors != NULL) 6297 Free(minors); 6298 if (ownpar != NULL) 6299 Free(ownpar); 6300 (void) fflush(stdout); 6301 6302 if ((keynlp != NULL) && (committed != 1)) { 6303 /* 6304 * if we haven't committed any softparts, either because of an 6305 * error or because the user decided not to proceed, delete 6306 * namelist key for the component 6307 */ 6308 if (mn_set) { 6309 mdnamelist_t *p; 6310 6311 for (p = keynlp; (p != NULL); p = p->next) { 6312 mdname_t *np = p->namep; 6313 md_mn_msg_delkeyname_t send_params; 6314 md_mn_result_t *resp = NULL; 6315 6316 send_params.delkeyname_dev = np->dev; 6317 send_params.delkeyname_setno = sp->setno; 6318 send_params.delkeyname_key = np->key; 6319 (void) mdmn_send_message(sp->setno, 6320 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6321 0, (char *)&send_params, 6322 sizeof (send_params), 6323 &resp, ep); 6324 if (resp != NULL) { 6325 free_result(resp); 6326 } 6327 } 6328 } else { 6329 (void) del_key_names(sp, keynlp, NULL); 6330 } 6331 } 6332 6333 metafreenamelist(keynlp); 6334 6335 if (err) 6336 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6337 6338 if (not_recovered) 6339 if (options & MDCMD_PRINT) 6340 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6341 "Soft Partitions NOT recovered from device.\n"), 6342 compnp->cname); 6343 return (0); 6344 } 6345 6346 /* 6347 * FUNCTION: meta_sp_recover_from_unit() 6348 * INPUT: sp - name of set we are recovering in 6349 * compnp - name of component we are recovering from 6350 * options - metarecover options 6351 * OUTPUT: ep - return error pointer 6352 * RETURNS: int - 0 - success, -1 - error 6353 * PURPOSE: update watermarks to match metadb records. begin by getting 6354 * a namelist representing all soft partitions on the specified 6355 * component. then, build an extlist representing the soft 6356 * partitions, filling in the freespace extents. notify user 6357 * of changes, place all soft partitions into the "recovering" 6358 * state and update the watermarks. finally, return all soft 6359 * partitions to the "OK" state. 6360 */ 6361 static int 6362 meta_sp_recover_from_unit( 6363 mdsetname_t *sp, 6364 mdname_t *compnp, 6365 mdcmdopts_t options, 6366 md_error_t *ep 6367 ) 6368 { 6369 mdnamelist_t *spnlp = NULL; 6370 mdnamelist_t *nlp = NULL; 6371 sp_ext_node_t *ext = NULL; 6372 sp_ext_node_t *extlist = NULL; 6373 int count; 6374 char yesno[255]; 6375 char *yes; 6376 int rval = 0; 6377 minor_t *minors = NULL; 6378 int i; 6379 md_sp_t *msp; 6380 md_set_desc *sd; 6381 bool_t mn_set = 0; 6382 daddr_t start_block; 6383 6384 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6385 if (count <= 0) 6386 return (-1); 6387 6388 /* set flag if dealing with a MN set */ 6389 if (!metaislocalset(sp)) { 6390 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6391 return (-1); 6392 } 6393 if (MD_MNSET_DESC(sd)) 6394 mn_set = 1; 6395 } 6396 /* 6397 * Save the XDR unit structure for one of the soft partitions; 6398 * we'll use this later to provide metadevice context to 6399 * update the watermarks so the device can be resolved by 6400 * devid instead of dev_t. 6401 */ 6402 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6403 metafreenamelist(spnlp); 6404 return (-1); 6405 } 6406 6407 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6408 MD_DISKADDR_ERROR) { 6409 return (-1); 6410 } 6411 6412 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6413 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6414 meta_sp_list_insert(NULL, NULL, &extlist, 6415 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6416 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6417 6418 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6419 metafreenamelist(spnlp); 6420 return (-1); 6421 } 6422 6423 assert(extlist != NULL); 6424 if ((options & MDCMD_VERBOSE) != 0) { 6425 (void) printf(dgettext(TEXT_DOMAIN, 6426 "Updating extent headers on device %s from metadb.\n\n"), 6427 compnp->cname); 6428 (void) printf(dgettext(TEXT_DOMAIN, 6429 "The following extent headers will be written:\n")); 6430 meta_sp_display_exthdr(); 6431 } 6432 6433 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6434 6435 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6436 6437 /* mark every node for updating except the reserved space */ 6438 if (ext->ext_type != EXTTYP_RESERVED) { 6439 ext->ext_flags |= EXTFLG_UPDATE; 6440 6441 /* print extent information */ 6442 if ((options & MDCMD_VERBOSE) != 0) 6443 meta_sp_display_ext(ext); 6444 } 6445 } 6446 6447 /* request verification and then update all watermarks */ 6448 if ((options & MDCMD_DOIT) != 0) { 6449 6450 (void) printf(dgettext(TEXT_DOMAIN, 6451 "\nWARNING: You are about to overwrite portions of %s\n" 6452 "with soft partition metadata. The extent headers will be\n" 6453 "written to match the existing metadb configuration. If\n" 6454 "the device was not previously setup with this\n" 6455 "configuration, data loss may result.\n\n"), 6456 compnp->cname); 6457 (void) printf(dgettext(TEXT_DOMAIN, 6458 "Are you sure you want to do this (yes/no)? ")); 6459 6460 (void) fflush(stdout); 6461 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6462 (strlen(yesno) == 1)) 6463 (void) snprintf(yesno, sizeof (yesno), 6464 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6465 yes = dgettext(TEXT_DOMAIN, "yes"); 6466 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6467 /* place soft partitions into recovering state */ 6468 minors = Zalloc(count * sizeof (minor_t)); 6469 for (nlp = spnlp, i = 0; 6470 nlp != NULL && i < count; 6471 nlp = nlp->next, i++) { 6472 assert(nlp->namep != NULL); 6473 minors[i] = meta_getminor(nlp->namep->dev); 6474 } 6475 if (update_sp_status(sp, minors, count, 6476 MD_SP_RECOVER, mn_set, ep) != 0) { 6477 rval = -1; 6478 goto out; 6479 } 6480 6481 /* update the watermarks */ 6482 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6483 rval = -1; 6484 goto out; 6485 } 6486 6487 if (options & MDCMD_PRINT) { 6488 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6489 "Soft Partitions recovered from metadb\n"), 6490 compnp->cname); 6491 } 6492 6493 /* return soft partitions to the OK state */ 6494 if (update_sp_status(sp, minors, count, 6495 MD_SP_OK, mn_set, ep) != 0) { 6496 rval = -1; 6497 goto out; 6498 } 6499 6500 rval = 0; 6501 goto out; 6502 } 6503 } 6504 6505 if (options & MDCMD_PRINT) { 6506 (void) printf(dgettext(TEXT_DOMAIN, 6507 "%s: Soft Partitions NOT recovered from metadb\n"), 6508 compnp->cname); 6509 } 6510 6511 out: 6512 if (minors != NULL) 6513 Free(minors); 6514 metafreenamelist(spnlp); 6515 meta_sp_list_free(&extlist); 6516 (void) fflush(stdout); 6517 return (rval); 6518 } 6519 6520 6521 /* 6522 * FUNCTION: meta_sp_update_abr() 6523 * INPUT: sp - name of set we are recovering in 6524 * OUTPUT: ep - return error pointer 6525 * RETURNS: int - 0 - success, -1 - error 6526 * PURPOSE: update the ABR state for all soft partitions in the set. This 6527 * is called when joining a set. It sends a message to the master 6528 * node for each soft partition to get the value of tstate and 6529 * then sets ABR ,if required, by opening the sp, setting ABR 6530 * and then closing the sp. This approach is taken rather that 6531 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6532 * the case when we have another node simultaneously unsetting ABR. 6533 */ 6534 int 6535 meta_sp_update_abr( 6536 mdsetname_t *sp, 6537 md_error_t *ep 6538 ) 6539 { 6540 mdnamelist_t *devnlp = NULL; 6541 mdnamelist_t *p; 6542 mdname_t *devnp = NULL; 6543 md_unit_t *un; 6544 char fname[MAXPATHLEN]; 6545 int mnum, fd; 6546 volcap_t vc; 6547 uint_t tstate; 6548 6549 6550 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6551 return (-1); 6552 } 6553 6554 /* Exit if no soft partitions in this set */ 6555 if (devnlp == NULL) 6556 return (0); 6557 6558 /* For each soft partition */ 6559 for (p = devnlp; (p != NULL); p = p->next) { 6560 devnp = p->namep; 6561 6562 /* check if this is a top level metadevice */ 6563 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6564 goto out; 6565 if (MD_HAS_PARENT(MD_PARENT(un))) { 6566 Free(un); 6567 continue; 6568 } 6569 Free(un); 6570 6571 /* Get tstate from Master */ 6572 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6573 mdname_t *np; 6574 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6575 ep); 6576 if (np) { 6577 md_perror(dgettext(TEXT_DOMAIN, 6578 "Unable to get tstate for %s"), np->cname); 6579 } 6580 continue; 6581 } 6582 /* If not set on the master, nothing to do */ 6583 if (!(tstate & MD_ABR_CAP)) 6584 continue; 6585 6586 mnum = meta_getminor(devnp->dev); 6587 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6588 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6589 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6590 md_perror(dgettext(TEXT_DOMAIN, 6591 "Could not open device %s"), fname); 6592 continue; 6593 } 6594 6595 /* Set ABR state */ 6596 vc.vc_info = 0; 6597 vc.vc_set = 0; 6598 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6599 (void) close(fd); 6600 continue; 6601 } 6602 6603 vc.vc_set = DKV_ABR_CAP; 6604 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6605 (void) close(fd); 6606 goto out; 6607 } 6608 6609 (void) close(fd); 6610 } 6611 metafreenamelist(devnlp); 6612 return (0); 6613 out: 6614 metafreenamelist(devnlp); 6615 return (-1); 6616 } 6617 6618 /* 6619 * FUNCTION: meta_mn_sp_update_abr() 6620 * INPUT: arg - Given set. 6621 * PURPOSE: update the ABR state for all soft partitions in the set by 6622 * forking a process to call meta_sp_update_abr() 6623 * This function is only called via rpc.metad when adding a node 6624 * to a set, ie this node is beong joined to the set by another 6625 * node. 6626 */ 6627 void * 6628 meta_mn_sp_update_abr(void *arg) 6629 { 6630 set_t setno = *((set_t *)arg); 6631 mdsetname_t *sp; 6632 md_error_t mde = mdnullerror; 6633 int fval; 6634 6635 /* should have a set */ 6636 assert(setno != NULL); 6637 6638 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6639 mde_perror(&mde, ""); 6640 return (NULL); 6641 } 6642 6643 if (!(meta_is_mn_set(sp, &mde))) { 6644 mde_perror(&mde, ""); 6645 return (NULL); 6646 } 6647 6648 /* fork a process */ 6649 if ((fval = md_daemonize(sp, &mde)) != 0) { 6650 /* 6651 * md_daemonize will fork off a process. The is the 6652 * parent or error. 6653 */ 6654 if (fval > 0) { 6655 return (NULL); 6656 } 6657 mde_perror(&mde, ""); 6658 return (NULL); 6659 } 6660 /* 6661 * Child process should never return back to rpc.metad, but 6662 * should exit. 6663 * Flush all internally cached data inherited from parent process 6664 * since cached data will be cleared when parent process RPC request 6665 * has completed (which is possibly before this child process 6666 * can complete). 6667 * Child process can retrieve and cache its own copy of data from 6668 * rpc.metad that won't be changed by the parent process. 6669 * 6670 * Reset md_in_daemon since this child will be a client of rpc.metad 6671 * not part of the rpc.metad daemon itself. 6672 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6673 * this thread is rpc.metad or any other thread. (If this thread 6674 * was rpc.metad it could use some short circuit code to get data 6675 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6676 */ 6677 md_in_daemon = 0; 6678 metaflushsetname(sp); 6679 sr_cache_flush_setno(setno); 6680 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6681 mde_perror(&mde, ""); 6682 md_exit(sp, 1); 6683 } 6684 6685 6686 /* 6687 * Closing stdin/out/err here. 6688 */ 6689 (void) close(0); 6690 (void) close(1); 6691 (void) close(2); 6692 assert(fval == 0); 6693 6694 (void) meta_sp_update_abr(sp, &mde); 6695 6696 md_exit(sp, 0); 6697 /*NOTREACHED*/ 6698 return (NULL); 6699 } 6700 6701 int 6702 meta_sp_check_component( 6703 mdsetname_t *sp, 6704 mdname_t *np, 6705 md_error_t *ep 6706 ) 6707 { 6708 md_sp_t *msp; 6709 minor_t mnum = 0; 6710 md_dev64_t dev = 0; 6711 mdnm_params_t nm; 6712 md_getdevs_params_t mgd; 6713 side_t sideno; 6714 char *miscname; 6715 md_dev64_t *mydev = NULL; 6716 char *pname = NULL, *t; 6717 char *ctd_name = NULL; 6718 char *devname = NULL; 6719 int len; 6720 int rval = -1; 6721 6722 (void) memset(&nm, '\0', sizeof (nm)); 6723 if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL) 6724 return (-1); 6725 6726 if ((miscname = metagetmiscname(np, ep)) == NULL) 6727 return (-1); 6728 6729 sideno = getmyside(sp, ep); 6730 6731 meta_sp_debug("meta_sp_check_component: %s is on %s key: %d" 6732 " dev: %llu\n", 6733 np->cname, msp->compnamep->cname, msp->compnamep->key, 6734 msp->compnamep->dev); 6735 6736 /* 6737 * Now get the data from the unit structure. The compnamep stuff 6738 * contains the data from the namespace and we need the un_dev 6739 * from the unit structure. 6740 */ 6741 (void) memset(&mgd, '\0', sizeof (mgd)); 6742 MD_SETDRIVERNAME(&mgd, miscname, sp->setno); 6743 mgd.cnt = 1; /* sp's only have one subdevice */ 6744 mgd.mnum = meta_getminor(np->dev); 6745 6746 mydev = Zalloc(sizeof (*mydev)); 6747 mgd.devs = (uintptr_t)mydev; 6748 6749 if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) { 6750 meta_sp_debug("meta_sp_check_component: ioctl failed\n"); 6751 (void) mdstealerror(ep, &mgd.mde); 6752 rval = 0; 6753 goto out; 6754 } else if (mgd.cnt <= 0) { 6755 assert(mgd.cnt >= 0); 6756 rval = 0; 6757 goto out; 6758 } 6759 6760 /* Get the devname from the name space. */ 6761 if ((devname = meta_getnmentbykey(sp->setno, sideno, 6762 msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) { 6763 meta_sp_debug("meta_sp_check_component: key %d not" 6764 "found\n", msp->compnamep->key); 6765 goto out; 6766 } 6767 6768 meta_sp_debug("dev %s from component: (%lu, %lu)\n", 6769 devname, 6770 meta_getmajor(*mydev), 6771 meta_getminor(*mydev)); 6772 meta_sp_debug("minor from the namespace: %lu\n", mnum); 6773 6774 if (mnum != meta_getminor(*mydev)) { 6775 /* 6776 * The minor numbers are different. Update the namespace 6777 * with the information from the component. 6778 */ 6779 6780 t = strrchr(devname, '/'); 6781 t++; 6782 ctd_name = Strdup(t); 6783 6784 meta_sp_debug("meta_sp_check_component: ctd_name: %s\n", 6785 ctd_name); 6786 6787 len = strlen(devname); 6788 t = strrchr(devname, '/'); 6789 t++; 6790 pname = Zalloc((len - strlen(t)) + 1); 6791 (void) strncpy(pname, devname, (len - strlen(t))); 6792 meta_sp_debug("pathname: %s\n", pname); 6793 6794 meta_sp_debug("updating the minor number to %lu\n", nm.mnum); 6795 6796 if (meta_update_namespace(sp->setno, sideno, 6797 ctd_name, *mydev, msp->compnamep->key, pname, 6798 ep) != 0) { 6799 goto out; 6800 } 6801 } 6802 out: 6803 if (pname != NULL) 6804 Free(pname); 6805 if (ctd_name != NULL) 6806 Free(ctd_name); 6807 if (devname != NULL) 6808 Free(devname); 6809 if (mydev != NULL) 6810 Free(mydev); 6811 return (rval); 6812 } 6813