1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Just in case we're not in a build environment, make sure that 30 * TEXT_DOMAIN gets set to something. 31 */ 32 #if !defined(TEXT_DOMAIN) 33 #define TEXT_DOMAIN "SYS_TEST" 34 #endif 35 36 /* 37 * soft partition operations 38 * 39 * Soft Partitions provide a virtual disk mechanism which is used to 40 * divide a large volume into many small pieces, each appearing as a 41 * separate device. A soft partition consists of a series of extents, 42 * each having an offset and a length. The extents are logically 43 * contiguous, so where the first extent leaves off the second extent 44 * picks up. Which extent a given "virtual offset" belongs to is 45 * dependent on the size of all the previous extents in the soft 46 * partition. 47 * 48 * Soft partitions are represented in memory by an extent node 49 * (sp_ext_node_t) which contains all of the information necessary to 50 * create a unit structure and update the on-disk format, called 51 * "watermarks". These extent nodes are typically kept in a doubly 52 * linked list and are manipulated by list manipulation routines. A 53 * list of extents may represent all of the soft partitions on a volume, 54 * a single soft partition, or perhaps just a set of extents that need 55 * to be updated. Extent lists may be sorted by extent or by name/seq#, 56 * depending on which compare function is used. Most of the routines 57 * require the list be sorted by offset to work, and that's the typical 58 * configuration. 59 * 60 * In order to do an allocation, knowledge of all soft partitions on the 61 * volume is required. Then free space is determined from the space 62 * that is not allocated, and new allocations can be made from the free 63 * space. Once the new allocations are made, a unit structure is created 64 * and the watermarks are updated. The status is then changed to "okay" 65 * on the unit structure to commit the transaction. If updating the 66 * watermarks fails, the unit structure is in an intermediate state and 67 * the driver will not allow access to the device. 68 * 69 * A typical sequence of events is: 70 * 1. Fetch the list of names for all soft partitions on a volume 71 * meta_sp_get_by_component() 72 * 2. Construct an extent list from the name list 73 * meta_sp_extlist_from_namelist() 74 * 3. Fill the gaps in the extent list with free extents 75 * meta_sp_list_freefill() 76 * 4. Allocate from the free extents 77 * meta_sp_alloc_by_len() 78 * meta_sp_alloc_by_list() 79 * 5. Create the unit structure from the extent list 80 * meta_sp_createunit() 81 * meta_sp_updateunit() 82 * 6. Write out the watermarks 83 * meta_sp_update_wm() 84 * 7. Set the status to "Okay" 85 * meta_sp_setstatus() 86 * 87 */ 88 89 #include <stdio.h> 90 #include <meta.h> 91 #include "meta_repartition.h" 92 #include <sys/lvm/md_sp.h> 93 #include <sys/lvm/md_crc.h> 94 #include <strings.h> 95 #include <sys/lvm/md_mirror.h> 96 #include <sys/bitmap.h> 97 98 extern int md_in_daemon; 99 100 typedef struct sp_ext_node { 101 struct sp_ext_node *ext_next; /* next element */ 102 struct sp_ext_node *ext_prev; /* previous element */ 103 sp_ext_type_t ext_type; /* type of extent */ 104 sp_ext_offset_t ext_offset; /* starting offset */ 105 sp_ext_length_t ext_length; /* length of this node */ 106 uint_t ext_flags; /* extent flags */ 107 uint32_t ext_seq; /* watermark seq no */ 108 mdname_t *ext_namep; /* name pointer */ 109 mdsetname_t *ext_setp; /* set pointer */ 110 } sp_ext_node_t; 111 112 /* extent flags */ 113 #define EXTFLG_UPDATE (1) 114 115 /* Extent node compare function for list sorting */ 116 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 117 118 119 /* Function Prototypes */ 120 121 /* Debugging Functions */ 122 static void meta_sp_debug(char *format, ...); 123 static void meta_sp_printunit(mp_unit_t *mp); 124 125 /* Misc Support Functions */ 126 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 127 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 128 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 129 md_error_t *ep); 130 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 131 mdnamelist_t **nlpp, int force, md_error_t *ep); 132 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 133 mdname_t *compnp, md_error_t *ep); 134 135 /* Extent List Manipulation Functions */ 136 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 137 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 138 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 139 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 140 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 141 static void meta_sp_list_free(sp_ext_node_t **head); 142 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 143 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 144 sp_ext_type_t exttype, int exclude_wm); 145 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 146 sp_ext_offset_t offset); 147 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 148 sp_ext_length_t size); 149 static void meta_sp_list_dump(sp_ext_node_t *head); 150 static int meta_sp_list_overlaps(sp_ext_node_t *head); 151 152 /* Extent List Query Functions */ 153 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 154 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 155 sp_ext_length_t alignment); 156 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 157 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 158 md_error_t *ep); 159 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 160 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 161 162 163 /* Extent Allocation Functions */ 164 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 165 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 166 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 167 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 168 sp_ext_node_t **extlist, sp_ext_length_t *lp, 169 sp_ext_offset_t last_off, sp_ext_length_t alignment); 170 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 171 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 172 173 /* Extent List Population Functions */ 174 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 175 sp_ext_node_t **extlist, md_error_t *ep); 176 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 177 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 178 179 /* Print (metastat) Functions */ 180 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 181 mdprtopts_t options, md_error_t *ep); 182 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 183 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 184 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 185 186 /* Watermark Manipulation Functions */ 187 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 188 sp_ext_node_t *extlist, md_error_t *ep); 189 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 190 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 191 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 192 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 193 md_error_t *ep); 194 195 /* Unit Structure Manipulation Functions */ 196 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 197 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 198 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 199 sp_status_t status, md_error_t *ep); 200 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 201 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 202 md_error_t *ep); 203 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 204 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 205 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 206 int *repart_options, md_error_t *ep); 207 208 /* Reset (metaclear) Functions */ 209 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 210 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 211 212 /* Recovery (metarecover) Functions */ 213 static void meta_sp_display_exthdr(void); 214 static void meta_sp_display_ext(sp_ext_node_t *ext); 215 static int meta_sp_checkseq(sp_ext_node_t *extlist); 216 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 217 mdname_t **, md_error_t *); 218 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 219 mdcmdopts_t options, md_error_t *ep); 220 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 221 mdcmdopts_t options, md_error_t *ep); 222 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 223 mdcmdopts_t options, md_error_t *ep); 224 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 225 sp_ext_node_t *unitext, md_error_t *ep); 226 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 227 mdcmdopts_t options, md_error_t *ep); 228 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 229 mdcmdopts_t options, md_error_t *ep); 230 231 /* 232 * Private Constants 233 */ 234 235 static const int FORCE_RELOAD_CACHE = 1; 236 static const uint_t NO_FLAGS = 0; 237 static const sp_ext_offset_t NO_OFFSET = 0ULL; 238 static const uint_t NO_SEQUENCE_NUMBER = 0; 239 static const int ONE_SOFT_PARTITION = 1; 240 241 static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)]; 242 243 #define TEST_SOFT_PARTITION_NAMEP NULL 244 #define TEST_SETNAMEP NULL 245 246 #define EXCLUDE_WM (1) 247 #define INCLUDE_WM (0) 248 249 #define SP_UNALIGNED (0LL) 250 251 /* 252 * ************************************************************************** 253 * Debugging Functions * 254 * ************************************************************************** 255 */ 256 257 /*PRINTFLIKE1*/ 258 static void 259 meta_sp_debug(char *format, ...) 260 { 261 static int debug; 262 static int debug_set = 0; 263 va_list ap; 264 265 if (!debug_set) { 266 debug = getenv(META_SP_DEBUG) ? 1 : 0; 267 debug_set = 1; 268 } 269 270 if (debug) { 271 va_start(ap, format); 272 (void) vfprintf(stderr, format, ap); 273 va_end(ap); 274 } 275 } 276 277 static void 278 meta_sp_printunit(mp_unit_t *mp) 279 { 280 int i; 281 282 if (mp == NULL) 283 return; 284 285 /* print the common fields we know about */ 286 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 287 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 288 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 289 290 /* sp-specific fields */ 291 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 292 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 293 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 294 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 295 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 296 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 297 298 /* print extent information */ 299 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 300 for (i = 0; i < mp->un_numexts; i++) { 301 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 302 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 303 mp->un_ext[i].un_len); 304 } 305 } 306 307 /* 308 * FUNCTION: meta_sp_parsesize() 309 * INPUT: s - the string to parse 310 * OUTPUT: *szp - disk block count (0 for "all") 311 * RETURNS: -1 for error, 0 for success 312 * PURPOSE: parses the command line parameter that specifies the 313 * requested size of a soft partition. The input string 314 * is either the literal "all" or a numeric value 315 * followed by a single character, b for disk blocks, k 316 * for kilobytes, m for megabytes, g for gigabytes, or t 317 * for terabytes. p for petabytes and e for exabytes 318 * have been added as undocumented features for future 319 * expansion. For example, 100m is 100 megabytes, while 320 * 50g is 50 gigabytes. All values are rounded up to the 321 * nearest block size. 322 */ 323 int 324 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 325 { 326 if (s == NULL || szp == NULL) { 327 return (-1); 328 } 329 330 /* Check for literal "all" */ 331 if (strcasecmp(s, "all") == 0) { 332 *szp = 0; 333 return (0); 334 } 335 336 return (meta_sp_parsesizestring(s, szp)); 337 } 338 339 /* 340 * FUNCTION: meta_sp_parsesizestring() 341 * INPUT: s - the string to parse 342 * OUTPUT: *szp - disk block count 343 * RETURNS: -1 for error, 0 for success 344 * PURPOSE: parses a string that specifies size. The input string is a 345 * numeric value followed by a single character, b for disk blocks, 346 * k for kilobytes, m for megabytes, g for gigabytes, or t for 347 * terabytes. p for petabytes and e for exabytes have been added 348 * as undocumented features for future expansion. For example, 349 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 350 * are rounded up to the nearest block size. 351 */ 352 static int 353 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 354 { 355 sp_ext_length_t len = 0; 356 char len_type[2]; 357 358 if (s == NULL || szp == NULL) { 359 return (-1); 360 } 361 362 /* 363 * make sure block offset does not overflow 2^64 bytes. 364 */ 365 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 366 (len == 0LL) || 367 (len > (1LL << (64 - DEV_BSHIFT)))) 368 return (-1); 369 370 switch (len_type[0]) { 371 case 'B': 372 case 'b': 373 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 374 break; 375 case 'K': 376 case 'k': 377 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 378 break; 379 case 'M': 380 case 'm': 381 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 382 break; 383 case 'g': 384 case 'G': 385 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 386 break; 387 case 't': 388 case 'T': 389 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 390 DEV_BSIZE)); 391 break; 392 case 'p': 393 case 'P': 394 len = lbtodb(roundup( 395 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 396 DEV_BSIZE)); 397 break; 398 case 'e': 399 case 'E': 400 len = lbtodb(roundup( 401 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 402 DEV_BSIZE)); 403 break; 404 default: 405 /* error */ 406 return (-1); 407 } 408 409 *szp = len; 410 return (0); 411 } 412 413 /* 414 * FUNCTION: meta_sp_setgeom() 415 * INPUT: np - the underlying device to setup geometry for 416 * compnp - the underlying device to setup geometry for 417 * mp - the unit structure to set the geometry for 418 * OUTPUT: ep - return error pointer 419 * RETURNS: int - -1 if error, 0 otherwise 420 * PURPOSE: establishes geometry information for a device 421 */ 422 static int 423 meta_sp_setgeom( 424 mdname_t *np, 425 mdname_t *compnp, 426 mp_unit_t *mp, 427 md_error_t *ep 428 ) 429 { 430 mdgeom_t *geomp; 431 uint_t round_cyl = 0; 432 433 if ((geomp = metagetgeom(compnp, ep)) == NULL) 434 return (-1); 435 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 436 geomp->read_reinstruct, round_cyl, ep) != 0) 437 return (-1); 438 439 return (0); 440 } 441 442 /* 443 * FUNCTION: meta_sp_setstatus() 444 * INPUT: sp - the set name for the devices to set the status on 445 * minors - an array of minor numbers of devices to set status on 446 * num_units - number of entries in the array 447 * status - status value to set all units to 448 * OUTPUT: ep - return error pointer 449 * RETURNS: int - -1 if error, 0 success 450 * PURPOSE: sets the status of one or more soft partitions to the 451 * requested value 452 */ 453 int 454 meta_sp_setstatus( 455 mdsetname_t *sp, 456 minor_t *minors, 457 int num_units, 458 sp_status_t status, 459 md_error_t *ep 460 ) 461 { 462 md_sp_statusset_t status_params; 463 464 assert(minors != NULL); 465 466 /* update status of all soft partitions to the status passed in */ 467 (void) memset(&status_params, 0, sizeof (status_params)); 468 status_params.num_units = num_units; 469 status_params.new_status = status; 470 status_params.size = num_units * sizeof (minor_t); 471 status_params.minors = (uintptr_t)minors; 472 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 473 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 474 NULL) != 0) { 475 (void) mdstealerror(ep, &status_params.mde); 476 return (-1); 477 } 478 return (0); 479 } 480 481 /* 482 * FUNCTION: meta_get_sp_names() 483 * INPUT: sp - the set name to get soft partitions from 484 * options - options from the command line 485 * OUTPUT: nlpp - list of all soft partition names 486 * ep - return error pointer 487 * RETURNS: int - -1 if error, 0 success 488 * PURPOSE: returns a list of all soft partitions in the metadb 489 * for all devices in the specified set 490 */ 491 int 492 meta_get_sp_names( 493 mdsetname_t *sp, 494 mdnamelist_t **nlpp, 495 int options, 496 md_error_t *ep 497 ) 498 { 499 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 500 } 501 502 /* 503 * FUNCTION: meta_get_by_component() 504 * INPUT: sp - the set name to get soft partitions from 505 * compnp - the name of the device containing the soft 506 * partitions that will be returned 507 * force - 0 - reads cached namelist if available, 508 * 1 - reloads cached namelist, frees old namelist 509 * OUTPUT: nlpp - list of all soft partition names 510 * ep - return error pointer 511 * RETURNS: int - -1 error, otherwise the number of soft partitions 512 * found on the component (0 = none found). 513 * PURPOSE: returns a list of all soft partitions on a given device 514 * from the metadb information 515 */ 516 static int 517 meta_sp_get_by_component( 518 mdsetname_t *sp, 519 mdname_t *compnp, 520 mdnamelist_t **nlpp, 521 int force, 522 md_error_t *ep 523 ) 524 { 525 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 526 static int cached_count = 0; /* cached count */ 527 mdnamelist_t *spnlp = NULL; /* all sp names */ 528 mdnamelist_t *namep; /* list iterator */ 529 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 530 mdnamelist_t **cachetailpp; /* cache tail */ 531 md_sp_t *msp; /* unit structure */ 532 int count = 0; /* count of sp's */ 533 int err; 534 mdname_t *curnp; 535 536 if ((cached_list != NULL) && (!force)) { 537 /* return a copy of the cached list */ 538 for (namep = cached_list; namep != NULL; namep = namep->next) 539 tailpp = meta_namelist_append_wrapper(tailpp, 540 namep->namep); 541 return (cached_count); 542 } 543 544 /* free the cache and reset values to zeros to prepare for a new list */ 545 metafreenamelist(cached_list); 546 cached_count = 0; 547 cached_list = NULL; 548 cachetailpp = &cached_list; 549 *nlpp = NULL; 550 551 /* get all the softpartitions first of all */ 552 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 553 return (-1); 554 555 /* 556 * Now for each sp, see if it resides on the component we 557 * are interested in, if so then add it to our list 558 */ 559 for (namep = spnlp; namep != NULL; namep = namep->next) { 560 curnp = namep->namep; 561 562 /* get the unit structure */ 563 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 564 continue; 565 566 /* 567 * If the current soft partition is not on the same 568 * component, continue the search. If it is on the same 569 * component, add it to our namelist. 570 */ 571 err = meta_check_samedrive(compnp, msp->compnamep, ep); 572 if (err <= 0) { 573 /* not on the same device, check the next one */ 574 continue; 575 } 576 577 /* it's on the same drive */ 578 579 /* 580 * Check for overlapping partitions if the component is not 581 * a metadevice. 582 */ 583 if (!metaismeta(msp->compnamep)) { 584 /* 585 * if they're on the same drive, neither 586 * should be a metadevice if one isn't 587 */ 588 assert(!metaismeta(compnp)); 589 590 if (meta_check_overlap(msp->compnamep->cname, 591 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 592 continue; 593 594 /* in this case it's not an error for them to overlap */ 595 mdclrerror(ep); 596 } 597 598 /* Component is on the same device, add to the used list */ 599 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 600 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 601 curnp); 602 603 ++count; 604 ++cached_count; 605 } 606 607 assert(count == cached_count); 608 return (count); 609 610 out: 611 metafreenamelist(*nlpp); 612 *nlpp = NULL; 613 return (-1); 614 } 615 616 /* 617 * FUNCTION: meta_sp_get_default_alignment() 618 * INPUT: sp - the pertinent set name 619 * compnp - the name of the underlying component 620 * OUTPUT: ep - return error pointer 621 * RETURNS: sp_ext_length_t =0: no default alignment 622 * >0: default alignment 623 * PURPOSE: returns the default alignment for soft partitions to 624 * be built on top of the specified component or 625 * metadevice 626 */ 627 static sp_ext_length_t 628 meta_sp_get_default_alignment( 629 mdsetname_t *sp, 630 mdname_t *compnp, 631 md_error_t *ep 632 ) 633 { 634 sp_ext_length_t a = SP_UNALIGNED; 635 char *mname; 636 637 assert(compnp != NULL); 638 639 /* 640 * We treat raw devices as opaque, and assume nothing about 641 * their alignment requirements. 642 */ 643 if (!metaismeta(compnp)) 644 return (SP_UNALIGNED); 645 646 /* 647 * We already know it's a metadevice from the previous test; 648 * metagetmiscname() will tell us which metadevice type we 649 * have 650 */ 651 mname = metagetmiscname(compnp, ep); 652 if (mname == NULL) 653 goto out; 654 655 /* 656 * For a mirror, we want to deal with the stripe that is the 657 * primary side. If it happens to be asymmetrically 658 * configured, there is no simple way to fake a universal 659 * alignment. There's a chance that the least common 660 * denominator of the set of interlaces from all stripes of 661 * all submirrors would do it, but nobody that really cared 662 * that much about this issue would create an asymmetric 663 * config to start with. 664 * 665 * If the component underlying the soft partition is a mirror, 666 * then at the exit of this loop, compnp will have been 667 * updated to describe the first active submirror. 668 */ 669 if (strcmp(mname, MD_MIRROR) == 0) { 670 md_mirror_t *mp; 671 int smi; 672 md_submirror_t *smp; 673 674 mp = meta_get_mirror(sp, compnp, ep); 675 if (mp == NULL) 676 goto out; 677 678 for (smi = 0; smi < NMIRROR; smi++) { 679 680 smp = &mp->submirrors[smi]; 681 if (smp->state == SMS_UNUSED) 682 continue; 683 684 compnp = smp->submirnamep; 685 assert(compnp != NULL); 686 687 mname = metagetmiscname(compnp, ep); 688 if (mname == NULL) 689 goto out; 690 691 break; 692 } 693 694 if (smi == NMIRROR) 695 goto out; 696 } 697 698 /* 699 * Handle stripes and submirrors identically; just return the 700 * interlace of the first row. 701 */ 702 if (strcmp(mname, MD_STRIPE) == 0) { 703 md_stripe_t *stp; 704 705 stp = meta_get_stripe(sp, compnp, ep); 706 if (stp == NULL) 707 goto out; 708 709 a = stp->rows.rows_val[0].interlace; 710 goto out; 711 } 712 713 /* 714 * Raid is even more straightforward; the interlace applies to 715 * the entire device. 716 */ 717 if (strcmp(mname, MD_RAID) == 0) { 718 md_raid_t *rp; 719 720 rp = meta_get_raid(sp, compnp, ep); 721 if (rp == NULL) 722 goto out; 723 724 a = rp->interlace; 725 goto out; 726 } 727 728 /* 729 * If we have arrived here with the alignment still not set, 730 * then we expect the error to have been set by one of the 731 * routines we called. If neither is the case, something has 732 * really gone wrong above. (Probably the submirror walk 733 * failed to produce a valid submirror, but that would be 734 * really bad...) 735 */ 736 out: 737 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 738 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 739 740 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 741 mde_perror(ep, NULL); 742 } 743 744 assert((a > 0) || (!mdisok(ep))); 745 746 return (a); 747 } 748 749 750 751 /* 752 * FUNCTION: meta_check_insp() 753 * INPUT: sp - the set name for the device to check 754 * np - the name of the device to check 755 * slblk - the starting offset of the device to check 756 * nblks - the number of blocks in the device to check 757 * OUTPUT: ep - return error pointer 758 * RETURNS: int - 0 - device contains soft partitions 759 * -1 - device does not contain soft partitions 760 * PURPOSE: determines whether a device contains any soft partitions 761 */ 762 /* ARGSUSED */ 763 int 764 meta_check_insp( 765 mdsetname_t *sp, 766 mdname_t *np, 767 diskaddr_t slblk, 768 diskaddr_t nblks, 769 md_error_t *ep 770 ) 771 { 772 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 773 int count; 774 int rval; 775 776 /* check set pointer */ 777 assert(sp != NULL); 778 779 /* 780 * Get a list of the soft partitions that currently reside on 781 * the component. We should ALWAYS force reload the cache, 782 * because if we're using the md.tab, we must rebuild 783 * the list because it won't contain the previous (if any) 784 * soft partition. 785 */ 786 /* find all soft partitions on the component */ 787 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 788 789 if (count == -1) { 790 rval = -1; 791 } else if (count > 0) { 792 rval = mduseerror(ep, MDE_ALREADY, np->dev, 793 spnlp->namep->cname, np->cname); 794 } else { 795 rval = 0; 796 } 797 798 metafreenamelist(spnlp); 799 return (rval); 800 } 801 802 /* 803 * ************************************************************************** 804 * Extent List Manipulation Functions * 805 * ************************************************************************** 806 */ 807 808 /* 809 * FUNCTION: meta_sp_cmp_by_nameseq() 810 * INPUT: e1 - first node to compare 811 * e2 - second node to compare 812 * OUTPUT: none 813 * RETURNS: int - =0 - nodes are equal 814 * <0 - e1 should go before e2 815 * >0 - e1 should go after e2 816 * PURPOSE: used for sorted list inserts to build a list sorted by 817 * name first and sequence number second. 818 */ 819 static int 820 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 821 { 822 int rval; 823 824 if (e1->ext_namep == NULL) 825 return (1); 826 if (e2->ext_namep == NULL) 827 return (-1); 828 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 829 return (rval); 830 831 /* the names are equal, compare sequence numbers */ 832 if (e1->ext_seq > e2->ext_seq) 833 return (1); 834 if (e1->ext_seq < e2->ext_seq) 835 return (-1); 836 /* sequence numbers are also equal */ 837 return (0); 838 } 839 840 /* 841 * FUNCTION: meta_sp_cmp_by_offset() 842 * INPUT: e1 - first node to compare 843 * e2 - second node to compare 844 * OUTPUT: none 845 * RETURNS: int - =0 - nodes are equal 846 * <0 - e1 should go before e2 847 * >0 - e1 should go after e2 848 * PURPOSE: used for sorted list inserts to build a list sorted by offset 849 */ 850 static int 851 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 852 { 853 if (e1->ext_offset > e2->ext_offset) 854 return (1); 855 if (e1->ext_offset < e2->ext_offset) 856 return (-1); 857 /* offsets are equal */ 858 return (0); 859 } 860 861 /* 862 * FUNCTION: meta_sp_list_insert() 863 * INPUT: sp - the set name for the device the node belongs to 864 * np - the name of the device the node belongs to 865 * head - the head of the list, must be NULL for empty list 866 * offset - the physical offset of this extent in sectors 867 * length - the length of this extent in sectors 868 * type - the type of the extent being inserted 869 * seq - the sequence number of the extent being inserted 870 * flags - extent flags (eg. whether it needs to be updated) 871 * compare - the compare function to use 872 * OUTPUT: head - points to the new head if a node was inserted 873 * at the beginning 874 * RETURNS: void 875 * PURPOSE: inserts an extent node into a sorted doubly linked list. 876 * The sort order is determined by the compare function. 877 * Memory is allocated for the node in this function and it 878 * is up to the caller to free it, possibly using 879 * meta_sp_list_free(). If a node is inserted at the 880 * beginning of the list, the head pointer is updated to 881 * point to the new first node. 882 */ 883 static void 884 meta_sp_list_insert( 885 mdsetname_t *sp, 886 mdname_t *np, 887 sp_ext_node_t **head, 888 sp_ext_offset_t offset, 889 sp_ext_length_t length, 890 sp_ext_type_t type, 891 uint_t seq, 892 uint_t flags, 893 ext_cmpfunc_t compare 894 ) 895 { 896 sp_ext_node_t *newext; 897 sp_ext_node_t *curext; 898 899 assert(head != NULL); 900 901 /* Don't bother adding zero length nodes */ 902 if (length == 0ULL) 903 return; 904 905 /* allocate and fill in new ext_node */ 906 newext = Zalloc(sizeof (sp_ext_node_t)); 907 908 newext->ext_offset = offset; 909 newext->ext_length = length; 910 newext->ext_flags = flags; 911 newext->ext_type = type; 912 newext->ext_seq = seq; 913 newext->ext_setp = sp; 914 newext->ext_namep = np; 915 916 /* first node in the list */ 917 if (*head == NULL) { 918 newext->ext_next = newext->ext_prev = NULL; 919 *head = newext; 920 } else if ((*compare)(*head, newext) >= 0) { 921 /* the first node has a bigger offset, so insert before it */ 922 assert((*head)->ext_prev == NULL); 923 924 newext->ext_prev = NULL; 925 newext->ext_next = *head; 926 (*head)->ext_prev = newext; 927 *head = newext; 928 } else { 929 /* 930 * find the next node whose offset is greater than 931 * the one we want to insert, or the end of the list. 932 */ 933 for (curext = *head; 934 (curext->ext_next != NULL) && 935 ((*compare)(curext->ext_next, newext) < 0); 936 (curext = curext->ext_next)) 937 ; 938 939 /* link the new node in after the current node */ 940 newext->ext_next = curext->ext_next; 941 newext->ext_prev = curext; 942 943 if (curext->ext_next != NULL) 944 curext->ext_next->ext_prev = newext; 945 946 curext->ext_next = newext; 947 } 948 } 949 950 /* 951 * FUNCTION: meta_sp_list_free() 952 * INPUT: head - the head of the list, must be NULL for empty list 953 * OUTPUT: head - points to NULL on return 954 * RETURNS: void 955 * PURPOSE: walks a double linked extent list and frees each node 956 */ 957 static void 958 meta_sp_list_free(sp_ext_node_t **head) 959 { 960 sp_ext_node_t *ext; 961 sp_ext_node_t *next; 962 963 assert(head != NULL); 964 965 ext = *head; 966 while (ext) { 967 next = ext->ext_next; 968 Free(ext); 969 ext = next; 970 } 971 *head = NULL; 972 } 973 974 /* 975 * FUNCTION: meta_sp_list_remove() 976 * INPUT: head - the head of the list, must be NULL for empty list 977 * ext - the extent to remove, must be a member of the list 978 * OUTPUT: head - points to the new head of the list 979 * RETURNS: void 980 * PURPOSE: unlinks the node specified by ext from the list and 981 * frees it, possibly moving the head pointer forward if 982 * the head is the node being removed. 983 */ 984 static void 985 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 986 { 987 assert(head != NULL); 988 assert(*head != NULL); 989 990 if (*head == ext) 991 *head = ext->ext_next; 992 993 if (ext->ext_prev != NULL) 994 ext->ext_prev->ext_next = ext->ext_next; 995 if (ext->ext_next != NULL) 996 ext->ext_next->ext_prev = ext->ext_prev; 997 Free(ext); 998 } 999 1000 /* 1001 * FUNCTION: meta_sp_list_size() 1002 * INPUT: head - the head of the list, must be NULL for empty list 1003 * exttype - the type of the extents to sum 1004 * exclude_wm - subtract space for extent headers from total 1005 * OUTPUT: none 1006 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1007 * PURPOSE: sums the lengths of all extents in the list matching the 1008 * specified type. This could be used for computing the 1009 * amount of free or used space, for example. 1010 */ 1011 static sp_ext_length_t 1012 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1013 { 1014 sp_ext_node_t *ext; 1015 sp_ext_length_t size = 0LL; 1016 1017 for (ext = head; ext != NULL; ext = ext->ext_next) 1018 if (ext->ext_type == exttype) 1019 size += ext->ext_length - 1020 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1021 1022 return (size); 1023 } 1024 1025 /* 1026 * FUNCTION: meta_sp_list_find() 1027 * INPUT: head - the head of the list, must be NULL for empty list 1028 * offset - the offset contained by the node to find 1029 * OUTPUT: none 1030 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1031 * or NULL if no such nodes were found. 1032 * PURPOSE: finds a node in a list containing the requested offset 1033 * (inclusive). If multiple nodes contain this offset then 1034 * only the first will be returned, though typically these 1035 * lists are managed with non-overlapping nodes. 1036 * 1037 * *The list MUST be sorted by offset for this function to work.* 1038 */ 1039 static sp_ext_node_t * 1040 meta_sp_list_find( 1041 sp_ext_node_t *head, 1042 sp_ext_offset_t offset 1043 ) 1044 { 1045 sp_ext_node_t *ext; 1046 1047 for (ext = head; ext != NULL; ext = ext->ext_next) { 1048 /* check if the offset lies within this extent */ 1049 if ((offset >= ext->ext_offset) && 1050 (offset < ext->ext_offset + ext->ext_length)) { 1051 /* 1052 * the requested extent should always be a 1053 * subset of an extent in the list. 1054 */ 1055 return (ext); 1056 } 1057 } 1058 return (NULL); 1059 } 1060 1061 /* 1062 * FUNCTION: meta_sp_list_freefill() 1063 * INPUT: head - the head of the list, must be NULL for empty list 1064 * size - the size of the volume this extent list is 1065 * representing 1066 * OUTPUT: head - the new head of the list 1067 * RETURNS: void 1068 * PURPOSE: finds gaps in the extent list and fills them with a free 1069 * node. If there is a gap at the beginning the head 1070 * pointer will be changed to point to the new free node. 1071 * If there is free space at the end, the last free extent 1072 * will extend all the way out to the size specified. 1073 * 1074 * *The list MUST be sorted by offset for this function to work.* 1075 */ 1076 static void 1077 meta_sp_list_freefill( 1078 sp_ext_node_t **head, 1079 sp_ext_length_t size 1080 ) 1081 { 1082 sp_ext_node_t *ext; 1083 sp_ext_offset_t curoff = 0LL; 1084 1085 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1086 if (curoff < ext->ext_offset) 1087 meta_sp_list_insert(NULL, NULL, head, 1088 curoff, ext->ext_offset - curoff, 1089 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1090 curoff = ext->ext_offset + ext->ext_length; 1091 } 1092 1093 /* pad inverse list out to the end */ 1094 if (curoff < size) 1095 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1096 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1097 1098 if (getenv(META_SP_DEBUG)) { 1099 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1100 "holes freefilled:\n"); 1101 meta_sp_list_dump(*head); 1102 } 1103 } 1104 1105 /* 1106 * FUNCTION: meta_sp_list_dump() 1107 * INPUT: head - the head of the list, must be NULL for empty list 1108 * OUTPUT: none 1109 * RETURNS: void 1110 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1111 */ 1112 static void 1113 meta_sp_list_dump(sp_ext_node_t *head) 1114 { 1115 sp_ext_node_t *ext; 1116 1117 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1118 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1119 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1120 "Next"); 1121 for (ext = head; ext != NULL; ext = ext->ext_next) { 1122 if (ext->ext_namep != NULL) 1123 meta_sp_debug("%5s", ext->ext_namep->cname); 1124 else 1125 meta_sp_debug("%5s", "NONE"); 1126 1127 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1128 switch (ext->ext_type) { 1129 case EXTTYP_ALLOC: 1130 meta_sp_debug("%7s ", "ALLOC"); 1131 break; 1132 case EXTTYP_FREE: 1133 meta_sp_debug("%7s ", "FREE"); 1134 break; 1135 case EXTTYP_END: 1136 meta_sp_debug("%7s ", "END"); 1137 break; 1138 case EXTTYP_RESERVED: 1139 meta_sp_debug("%7s ", "RESV"); 1140 break; 1141 default: 1142 meta_sp_debug("%7s ", "INVLD"); 1143 break; 1144 } 1145 1146 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1147 ext->ext_offset, ext->ext_length, 1148 ext->ext_flags, (void *) ext->ext_prev, 1149 (void *) ext->ext_next); 1150 } 1151 meta_sp_debug("\n"); 1152 } 1153 1154 /* 1155 * FUNCTION: meta_sp_list_overlaps() 1156 * INPUT: head - the head of the list, must be NULL for empty list 1157 * OUTPUT: none 1158 * RETURNS: int - 1 if extents overlap, 0 if ok 1159 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1160 * offset for this function to work properly. 1161 */ 1162 static int 1163 meta_sp_list_overlaps(sp_ext_node_t *head) 1164 { 1165 sp_ext_node_t *ext; 1166 1167 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1168 if (ext->ext_offset + ext->ext_length > 1169 ext->ext_next->ext_offset) 1170 return (1); 1171 } 1172 return (0); 1173 } 1174 1175 /* 1176 * ************************************************************************** 1177 * Extent Allocation Functions * 1178 * ************************************************************************** 1179 */ 1180 1181 /* 1182 * FUNCTION: meta_sp_alloc_by_ext() 1183 * INPUT: sp - the set name for the device the node belongs to 1184 * np - the name of the device the node belongs to 1185 * head - the head of the list, must be NULL for empty list 1186 * free_ext - the free extent being allocated from 1187 * alloc_offset - the offset of the allocation 1188 * alloc_len - the length of the allocation 1189 * seq - the sequence number of the allocation 1190 * OUTPUT: head - the new head pointer 1191 * RETURNS: void 1192 * PURPOSE: allocates a portion of the free extent free_ext. The 1193 * allocated portion starts at alloc_offset and is 1194 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1195 * alloc_length) must be contained within the free extent. 1196 * 1197 * The free extent is split into as many as 3 pieces - a 1198 * free extent containing [ free_offset .. alloc_offset ), an 1199 * allocated extent containing the range [ alloc_offset .. 1200 * alloc_end ], and another free extent containing the 1201 * range ( alloc_end .. free_end ]. If either of the two 1202 * new free extents would be zero length, they are not created. 1203 * 1204 * Finally, the original free extent is removed. All newly 1205 * created extents have the EXTFLG_UPDATE flag set. 1206 */ 1207 static void 1208 meta_sp_alloc_by_ext( 1209 mdsetname_t *sp, 1210 mdname_t *np, 1211 sp_ext_node_t **head, 1212 sp_ext_node_t *free_ext, 1213 sp_ext_offset_t alloc_offset, 1214 sp_ext_length_t alloc_length, 1215 uint_t seq 1216 ) 1217 { 1218 sp_ext_offset_t free_offset = free_ext->ext_offset; 1219 sp_ext_length_t free_length = free_ext->ext_length; 1220 1221 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1222 sp_ext_offset_t free_end = free_offset + free_length; 1223 1224 /* allocated extent must be a subset of the free extent */ 1225 assert(free_offset <= alloc_offset); 1226 assert(free_end >= alloc_end); 1227 1228 meta_sp_list_remove(head, free_ext); 1229 1230 if (free_offset < alloc_offset) { 1231 meta_sp_list_insert(NULL, NULL, head, free_offset, 1232 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1233 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1234 } 1235 1236 if (free_end > alloc_end) { 1237 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1238 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1239 meta_sp_cmp_by_offset); 1240 } 1241 1242 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1243 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1244 1245 if (getenv(META_SP_DEBUG)) { 1246 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1247 meta_sp_list_dump(*head); 1248 } 1249 } 1250 1251 /* 1252 * FUNCTION: meta_sp_alloc_by_len() 1253 * INPUT: sp - the set name for the device the node belongs to 1254 * np - the name of the device the node belongs to 1255 * head - the head of the list, must be NULL for empty list 1256 * *lp - the requested length to allocate 1257 * last_off - the last offset already allocated. 1258 * alignment - the desired extent alignmeent 1259 * OUTPUT: head - the new head pointer 1260 * *lp - the length allocated 1261 * RETURNS: int - -1 if error, the number of new extents on success 1262 * PURPOSE: allocates extents from free space to satisfy the requested 1263 * length. If requested length is zero, allocates all 1264 * remaining free space. This function provides the meat 1265 * of the extent allocation algorithm. Allocation is a 1266 * three tier process: 1267 * 1268 * 1. If last_off is nonzero and there is free space following 1269 * that node, then it is extended to allocate as much of that 1270 * free space as possible. This is useful for metattach. 1271 * 2. If a free extent can be found to satisfy the remaining 1272 * requested space, then satisfy the rest of the request 1273 * from that extent. 1274 * 3. Start allocating space from any remaining free extents until 1275 * the remainder of the request is satisified. 1276 * 1277 * If alignment is non-zero, then every extent modified 1278 * or newly allocated will be aligned modulo alignment, 1279 * with a length that is an integer multiple of 1280 * alignment. 1281 * 1282 * The EXTFLG_UPDATE flag is set for all nodes (free and 1283 * allocated) that require updated watermarks. 1284 * 1285 * This algorithm may have a negative impact on fragmentation 1286 * in pathological cases and may be improved if it turns out 1287 * to be a problem. This may be exacerbated by particularly 1288 * large alignments. 1289 * 1290 * NOTE: It's confusing, so it demands an explanation: 1291 * - len is used to represent requested data space; it 1292 * does not include room for a watermark. On each full 1293 * or partial allocation, len will be decremented by 1294 * alloc_len (see next paragraph) until it reaches 1295 * zero. 1296 * - alloc_len is used to represent data space allocated 1297 * from a particular extent; it does not include space 1298 * for a watermark. In the rare event that a_length 1299 * (see next paragraph) is equal to MD_SP_WMSIZE, 1300 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1301 * fragment of space will be utterly unusable. 1302 * - a_length is used to represent all space to be 1303 * allocated from a particular extent; it DOES include 1304 * space for a watermark. 1305 */ 1306 static int 1307 meta_sp_alloc_by_len( 1308 mdsetname_t *sp, 1309 mdname_t *np, 1310 sp_ext_node_t **head, 1311 sp_ext_length_t *lp, 1312 sp_ext_offset_t last_off, 1313 sp_ext_offset_t alignment 1314 ) 1315 { 1316 sp_ext_node_t *free_ext; 1317 sp_ext_node_t *alloc_ext; 1318 uint_t last_seq = 0; 1319 uint_t numexts = 0; 1320 sp_ext_length_t freespace; 1321 sp_ext_length_t alloc_len; 1322 sp_ext_length_t len; 1323 1324 /* We're DOA if we can't read *lp */ 1325 assert(lp != NULL); 1326 len = *lp; 1327 1328 /* 1329 * Process the nominal case first: we've been given an actual 1330 * size argument, rather than the literal "all" 1331 */ 1332 1333 if (len != 0) { 1334 1335 /* 1336 * Short circuit the check for free space. This may 1337 * tell us we have enough space when we really don't 1338 * because each extent loses space to a watermark, but 1339 * it will always tell us there isn't enough space 1340 * correctly. Worst case we do some extra work. 1341 */ 1342 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1343 INCLUDE_WM); 1344 1345 if (freespace < len) 1346 return (-1); 1347 1348 /* 1349 * First see if we can extend the last extent for an 1350 * attach. 1351 */ 1352 if (last_off != 0LL) { 1353 int align = 0; 1354 1355 alloc_ext = 1356 meta_sp_list_find(*head, last_off); 1357 assert(alloc_ext != NULL); 1358 1359 /* 1360 * The offset test reflects the 1361 * inclusion of the watermark in the extent 1362 */ 1363 align = (alignment > 0) && 1364 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1365 alignment) == 0); 1366 1367 /* 1368 * If we decided not to align here, we should 1369 * also reset "alignment" so we don't bother 1370 * later, either. 1371 */ 1372 if (!align) { 1373 alignment = 0; 1374 } 1375 1376 last_seq = alloc_ext->ext_seq; 1377 1378 free_ext = meta_sp_list_find(*head, 1379 alloc_ext->ext_offset + 1380 alloc_ext->ext_length); 1381 1382 /* 1383 * If a free extent follows our last allocated 1384 * extent, then remove the last allocated 1385 * extent and increase the size of the free 1386 * extent to overlap it, then allocate the 1387 * total space from the new free extent. 1388 */ 1389 if (free_ext != NULL && 1390 free_ext->ext_type == EXTTYP_FREE) { 1391 assert(free_ext->ext_offset == 1392 alloc_ext->ext_offset + 1393 alloc_ext->ext_length); 1394 1395 alloc_len = 1396 MIN(len, free_ext->ext_length); 1397 1398 if (align && (alloc_len < len)) { 1399 /* No watermark space needed */ 1400 alloc_len -= alloc_len % alignment; 1401 } 1402 1403 if (alloc_len > 0) { 1404 free_ext->ext_offset -= 1405 alloc_ext->ext_length; 1406 free_ext->ext_length += 1407 alloc_ext->ext_length; 1408 1409 meta_sp_alloc_by_ext(sp, np, head, 1410 free_ext, free_ext->ext_offset, 1411 alloc_ext->ext_length + alloc_len, 1412 last_seq); 1413 1414 /* 1415 * now remove the original allocated 1416 * node. We may have overlapping 1417 * extents for a short time before 1418 * this node is removed. 1419 */ 1420 meta_sp_list_remove(head, alloc_ext); 1421 len -= alloc_len; 1422 } 1423 } 1424 last_seq++; 1425 } 1426 1427 if (len == 0LL) 1428 goto out; 1429 1430 /* 1431 * Next, see if we can find a single allocation for 1432 * the remainder. This may make fragmentation worse 1433 * in some cases, but there's no good way to allocate 1434 * that doesn't have a highly fragmented corner case. 1435 */ 1436 for (free_ext = *head; free_ext != NULL; 1437 free_ext = free_ext->ext_next) { 1438 sp_ext_offset_t a_offset; 1439 sp_ext_offset_t a_length; 1440 1441 if (free_ext->ext_type != EXTTYP_FREE) 1442 continue; 1443 1444 /* 1445 * The length test should include space for 1446 * the watermark 1447 */ 1448 1449 a_offset = free_ext->ext_offset; 1450 a_length = free_ext->ext_length; 1451 1452 if (alignment > 0) { 1453 1454 /* 1455 * Shortcut for extents that have been 1456 * previously added to pad out the 1457 * data space 1458 */ 1459 if (a_length < alignment) { 1460 continue; 1461 } 1462 1463 /* 1464 * Round up so the data space begins 1465 * on a properly aligned boundary. 1466 */ 1467 a_offset += alignment - 1468 (a_offset % alignment) - MD_SP_WMSIZE; 1469 1470 /* 1471 * This is only necessary in case the 1472 * watermark size is ever greater than 1473 * one. It'll never happen, of 1474 * course; we'll get rid of watermarks 1475 * before we make 'em bigger. 1476 */ 1477 if (a_offset < free_ext->ext_offset) { 1478 a_offset += alignment; 1479 } 1480 1481 /* 1482 * Adjust the length to account for 1483 * the space lost above (if any) 1484 */ 1485 a_length -= 1486 (a_offset - free_ext->ext_offset); 1487 } 1488 1489 if (a_length >= len + MD_SP_WMSIZE) { 1490 meta_sp_alloc_by_ext(sp, np, head, 1491 free_ext, a_offset, 1492 len + MD_SP_WMSIZE, last_seq); 1493 1494 len = 0LL; 1495 numexts++; 1496 break; 1497 } 1498 } 1499 1500 if (len == 0LL) 1501 goto out; 1502 1503 1504 /* 1505 * If the request could not be satisfied by extending 1506 * the last extent or by a single extent, then put 1507 * multiple smaller extents together until the request 1508 * is satisfied. 1509 */ 1510 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1511 free_ext = free_ext->ext_next) { 1512 sp_ext_offset_t a_offset; 1513 sp_ext_length_t a_length; 1514 1515 if (free_ext->ext_type != EXTTYP_FREE) 1516 continue; 1517 1518 a_offset = free_ext->ext_offset; 1519 a_length = free_ext->ext_length; 1520 1521 if (alignment > 0) { 1522 1523 /* 1524 * Shortcut for extents that have been 1525 * previously added to pad out the 1526 * data space 1527 */ 1528 if (a_length < alignment) { 1529 continue; 1530 } 1531 1532 /* 1533 * Round up so the data space begins 1534 * on a properly aligned boundary. 1535 */ 1536 a_offset += alignment - 1537 (a_offset % alignment) - MD_SP_WMSIZE; 1538 1539 /* 1540 * This is only necessary in case the 1541 * watermark size is ever greater than 1542 * one. It'll never happen, of 1543 * course; we'll get rid of watermarks 1544 * before we make 'em bigger. 1545 */ 1546 if (a_offset < free_ext->ext_offset) { 1547 a_offset += alignment; 1548 } 1549 1550 /* 1551 * Adjust the length to account for 1552 * the space lost above (if any) 1553 */ 1554 a_length -= 1555 (a_offset - free_ext->ext_offset); 1556 1557 /* 1558 * Adjust the length to be properly 1559 * aligned if it is NOT to be the 1560 * last extent in the soft partition. 1561 */ 1562 if ((a_length - MD_SP_WMSIZE) < len) 1563 a_length -= 1564 (a_length - MD_SP_WMSIZE) 1565 % alignment; 1566 } 1567 1568 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1569 if (alloc_len == 0) 1570 continue; 1571 1572 /* 1573 * meta_sp_alloc_by_ext() expects the 1574 * allocation length to include the watermark 1575 * size, which is why we don't simply pass in 1576 * alloc_len here. 1577 */ 1578 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1579 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1580 last_seq); 1581 1582 len -= alloc_len; 1583 numexts++; 1584 last_seq++; 1585 } 1586 1587 1588 /* 1589 * If there was not enough space we can throw it all 1590 * away since no real work has been done yet. 1591 */ 1592 if (len != 0) { 1593 meta_sp_list_free(head); 1594 return (-1); 1595 } 1596 } 1597 1598 /* 1599 * Otherwise, the literal "all" was specified: allocate all 1600 * available free space. Don't bother with alignment. 1601 */ 1602 else { 1603 /* First, extend the last extent if this is a grow */ 1604 if (last_off != 0LL) { 1605 alloc_ext = 1606 meta_sp_list_find(*head, last_off); 1607 assert(alloc_ext != NULL); 1608 1609 last_seq = alloc_ext->ext_seq; 1610 1611 free_ext = meta_sp_list_find(*head, 1612 alloc_ext->ext_offset + 1613 alloc_ext->ext_length); 1614 1615 /* 1616 * If a free extent follows our last allocated 1617 * extent, then remove the last allocated 1618 * extent and increase the size of the free 1619 * extent to overlap it, then allocate the 1620 * total space from the new free extent. 1621 */ 1622 if (free_ext != NULL && 1623 free_ext->ext_type == EXTTYP_FREE) { 1624 assert(free_ext->ext_offset == 1625 alloc_ext->ext_offset + 1626 alloc_ext->ext_length); 1627 1628 len = alloc_len = 1629 free_ext->ext_length; 1630 1631 free_ext->ext_offset -= 1632 alloc_ext->ext_length; 1633 free_ext->ext_length += 1634 alloc_ext->ext_length; 1635 1636 meta_sp_alloc_by_ext(sp, np, head, 1637 free_ext, free_ext->ext_offset, 1638 alloc_ext->ext_length + alloc_len, 1639 last_seq); 1640 1641 /* 1642 * now remove the original allocated 1643 * node. We may have overlapping 1644 * extents for a short time before 1645 * this node is removed. 1646 */ 1647 meta_sp_list_remove(head, alloc_ext); 1648 } 1649 1650 last_seq++; 1651 } 1652 1653 /* Next, grab all remaining free space */ 1654 for (free_ext = *head; free_ext != NULL; 1655 free_ext = free_ext->ext_next) { 1656 1657 if (free_ext->ext_type == EXTTYP_FREE) { 1658 alloc_len = 1659 free_ext->ext_length - MD_SP_WMSIZE; 1660 if (alloc_len == 0) 1661 continue; 1662 1663 /* 1664 * meta_sp_alloc_by_ext() expects the 1665 * allocation length to include the 1666 * watermark size, which is why we 1667 * don't simply pass in alloc_len 1668 * here. 1669 */ 1670 meta_sp_alloc_by_ext(sp, np, head, 1671 free_ext, free_ext->ext_offset, 1672 free_ext->ext_length, 1673 last_seq); 1674 1675 len += alloc_len; 1676 numexts++; 1677 last_seq++; 1678 } 1679 } 1680 } 1681 1682 out: 1683 if (getenv(META_SP_DEBUG)) { 1684 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1685 "allocation:\n"); 1686 meta_sp_list_dump(*head); 1687 } 1688 1689 if (*lp == 0) { 1690 *lp = len; 1691 1692 /* 1693 * Make sure the callers hit a no space error if we 1694 * didn't actually find anything. 1695 */ 1696 if (len == 0) { 1697 return (-1); 1698 } 1699 } 1700 1701 return (numexts); 1702 } 1703 1704 /* 1705 * FUNCTION: meta_sp_alloc_by_list() 1706 * INPUT: sp - the set name for the device the node belongs to 1707 * np - the name of the device the node belongs to 1708 * head - the head of the list, must be NULL for empty list 1709 * oblist - an extent list containing requested nodes to allocate 1710 * OUTPUT: head - the new head pointer 1711 * RETURNS: int - -1 if error, the number of new extents on success 1712 * PURPOSE: allocates extents from free space to satisfy the requested 1713 * extent list. This is primarily used for the -o/-b options 1714 * where the user may specifically request extents to allocate. 1715 * Each extent in the oblist must be a subset (inclusive) of a 1716 * free extent and may not overlap each other. This 1717 * function sets the EXTFLG_UPDATE flag for each node that 1718 * requires a watermark update after allocating. 1719 */ 1720 static int 1721 meta_sp_alloc_by_list( 1722 mdsetname_t *sp, 1723 mdname_t *np, 1724 sp_ext_node_t **head, 1725 sp_ext_node_t *oblist 1726 ) 1727 { 1728 sp_ext_node_t *ext; 1729 sp_ext_node_t *free_ext; 1730 uint_t numexts = 0; 1731 1732 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1733 1734 free_ext = meta_sp_list_find(*head, 1735 ext->ext_offset - MD_SP_WMSIZE); 1736 1737 /* Make sure the allocation is within the free extent */ 1738 if ((free_ext == NULL) || 1739 (ext->ext_offset + ext->ext_length > 1740 free_ext->ext_offset + free_ext->ext_length) || 1741 (free_ext->ext_type != EXTTYP_FREE)) 1742 return (-1); 1743 1744 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1745 ext->ext_offset - MD_SP_WMSIZE, 1746 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1747 1748 numexts++; 1749 } 1750 1751 assert(meta_sp_list_overlaps(*head) == 0); 1752 1753 if (getenv(META_SP_DEBUG)) { 1754 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1755 "allocation:\n"); 1756 meta_sp_list_dump(*head); 1757 } 1758 1759 return (numexts); 1760 } 1761 1762 /* 1763 * ************************************************************************** 1764 * Extent List Population Functions * 1765 * ************************************************************************** 1766 */ 1767 1768 /* 1769 * FUNCTION: meta_sp_extlist_from_namelist() 1770 * INPUT: sp - the set name for the device the node belongs to 1771 * spnplp - the namelist of soft partitions to build a list from 1772 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1773 * ep - return error pointer 1774 * RETURNS: int - -1 if error, 0 on success 1775 * PURPOSE: builds an extent list representing the soft partitions 1776 * specified in the namelist. Each extent in each soft 1777 * partition is added to the list with the type EXTTYP_ALLOC. 1778 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1779 * extent in the list includes the space occupied by the 1780 * watermark, which is not included in the unit structures. 1781 */ 1782 static int 1783 meta_sp_extlist_from_namelist( 1784 mdsetname_t *sp, 1785 mdnamelist_t *spnlp, 1786 sp_ext_node_t **extlist, 1787 md_error_t *ep 1788 ) 1789 { 1790 int extn; 1791 md_sp_t *msp; /* unit structure of the sp's */ 1792 mdnamelist_t *namep; 1793 1794 assert(sp != NULL); 1795 1796 /* 1797 * Now go through the soft partitions and add a node to the used 1798 * list for each allocated extent. 1799 */ 1800 for (namep = spnlp; namep != NULL; namep = namep->next) { 1801 mdname_t *curnp = namep->namep; 1802 1803 /* get the unit structure */ 1804 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1805 return (-1); 1806 1807 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1808 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1809 1810 /* 1811 * subtract from offset and add to the length 1812 * to account for the watermark, which is not 1813 * contained in the extents in the unit structure. 1814 */ 1815 meta_sp_list_insert(sp, curnp, extlist, 1816 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1817 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1818 } 1819 } 1820 return (0); 1821 } 1822 1823 /* 1824 * FUNCTION: meta_sp_extlist_from_wm() 1825 * INPUT: sp - the set name for the device the node belongs to 1826 * compnp - the name of the device to scan watermarks on 1827 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1828 * ep - return error pointer 1829 * RETURNS: int - -1 if error, 0 on success 1830 * PURPOSE: builds an extent list representing the soft partitions 1831 * specified in the namelist. Each extent in each soft 1832 * partition is added to the list with the type EXTTYP_ALLOC. 1833 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1834 * extent in the list includes the space occupied by the 1835 * watermark, which is not included in the unit structures. 1836 */ 1837 static int 1838 meta_sp_extlist_from_wm( 1839 mdsetname_t *sp, 1840 mdname_t *compnp, 1841 sp_ext_node_t **extlist, 1842 ext_cmpfunc_t compare, 1843 md_error_t *ep 1844 ) 1845 { 1846 mp_watermark_t wm; 1847 mdname_t *np = NULL; 1848 mdsetname_t *spsetp = NULL; 1849 sp_ext_offset_t cur_off; 1850 md_set_desc *sd; 1851 int init = 0; 1852 mdkey_t key; 1853 minor_t mnum; 1854 1855 if (!metaislocalset(sp)) { 1856 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1857 return (-1); 1858 } 1859 1860 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1861 return (-1); 1862 1863 for (;;) { 1864 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1865 return (-1); 1866 } 1867 1868 /* get the set and name pointers */ 1869 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1870 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1871 return (-1); 1872 } 1873 } 1874 1875 /* 1876 * For the MN set, meta_init_make_device needs to 1877 * be run on all the nodes so the entries for the 1878 * softpart device name and its comp can be created 1879 * in the same order in the replica namespace. If 1880 * we have it run on mdmn_do_iocset then the mddbs 1881 * will be out of sync between master node and slave 1882 * nodes. 1883 */ 1884 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1885 1886 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1887 md_mn_msg_addmdname_t *send_params; 1888 int result; 1889 md_mn_result_t *resp = NULL; 1890 int message_size; 1891 1892 message_size = sizeof (*send_params) + 1893 strlen(wm.wm_mdname) + 1; 1894 send_params = Zalloc(message_size); 1895 send_params->addmdname_setno = sp->setno; 1896 (void) strcpy(&send_params->addmdname_name[0], 1897 wm.wm_mdname); 1898 result = mdmn_send_message(sp->setno, 1899 MD_MN_MSG_ADDMDNAME, 1900 MD_MSGF_PANIC_WHEN_INCONSISTENT, 1901 (char *)send_params, message_size, &resp, 1902 ep); 1903 Free(send_params); 1904 if (resp != NULL) { 1905 if (resp->mmr_exitval != 0) { 1906 free_result(resp); 1907 return (-1); 1908 } 1909 free_result(resp); 1910 } 1911 if (result != 0) 1912 return (-1); 1913 } else { 1914 1915 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1916 if ((key = meta_init_make_device(&sp, 1917 wm.wm_mdname, ep)) <= 0) { 1918 return (-1); 1919 } 1920 init = 1; 1921 } 1922 } 1923 1924 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1925 if (np == NULL) { 1926 if (init) { 1927 if (meta_getnmentbykey(sp->setno, MD_SIDEWILD, 1928 key, NULL, &mnum, NULL, ep) != NULL) { 1929 (void) metaioctl(MD_IOCREM_DEV, &mnum, 1930 ep, NULL); 1931 } 1932 (void) del_self_name(sp, key, ep); 1933 } 1934 return (-1); 1935 } 1936 } 1937 1938 /* insert watermark into extent list */ 1939 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1940 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1941 EXTFLG_UPDATE, compare); 1942 1943 /* if we see the end watermark, we're done */ 1944 if (wm.wm_type == EXTTYP_END) 1945 break; 1946 1947 cur_off += wm.wm_length + 1; 1948 1949 /* clear out set and name pointers for next iteration */ 1950 np = NULL; 1951 spsetp = NULL; 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * ************************************************************************** 1959 * Print (metastat) Functions * 1960 * ************************************************************************** 1961 */ 1962 1963 /* 1964 * FUNCTION: meta_sp_short_print() 1965 * INPUT: msp - the unit structure to display 1966 * fp - the file pointer to send output to 1967 * options - print options from the command line processor 1968 * OUTPUT: ep - return error pointer 1969 * RETURNS: int - -1 if error, 0 on success 1970 * PURPOSE: display a short report of the soft partition in md.tab 1971 * form, primarily used for metastat -p. 1972 */ 1973 static int 1974 meta_sp_short_print( 1975 md_sp_t *msp, 1976 char *fname, 1977 FILE *fp, 1978 mdprtopts_t options, 1979 md_error_t *ep 1980 ) 1981 { 1982 int extn; 1983 1984 if (options & PRINT_LARGEDEVICES) { 1985 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1986 return (0); 1987 } 1988 1989 if (options & PRINT_FN) { 1990 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1991 return (0); 1992 } 1993 1994 /* print name and -p */ 1995 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1996 return (mdsyserror(ep, errno, fname)); 1997 1998 /* print the component */ 1999 /* 2000 * Always print the full path name 2001 */ 2002 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2003 return (mdsyserror(ep, errno, fname)); 2004 2005 /* print out each extent */ 2006 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2007 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2008 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2009 extp->len) == EOF) 2010 return (mdsyserror(ep, errno, fname)); 2011 } 2012 2013 if (fprintf(fp, "\n") == EOF) 2014 return (mdsyserror(ep, errno, fname)); 2015 2016 /* success */ 2017 return (0); 2018 } 2019 2020 /* 2021 * FUNCTION: meta_sp_status_to_name() 2022 * INPUT: xsp_status - the status value to convert to a string 2023 * tstate - transient errored device state. If set the 2024 * device is Unavailable 2025 * OUTPUT: none 2026 * RETURNS: char * - a pointer to the string representing the status value 2027 * PURPOSE: return an internationalized string representing the 2028 * status value for a soft partition. The strings are 2029 * strdup'd and must be freed by the caller. 2030 */ 2031 static char * 2032 meta_sp_status_to_name( 2033 xsp_status_t xsp_status, 2034 uint_t tstate 2035 ) 2036 { 2037 char *rval = NULL; 2038 2039 /* 2040 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2041 * value for an 'Unavailable' return. tstate can be set because of 2042 * other multi-node reasons (e.g. ABR being set) 2043 */ 2044 if (tstate & MD_INACCESSIBLE) { 2045 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2046 } 2047 2048 switch (xsp_status) { 2049 case MD_SP_CREATEPEND: 2050 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2051 break; 2052 case MD_SP_GROWPEND: 2053 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2054 break; 2055 case MD_SP_DELPEND: 2056 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2057 break; 2058 case MD_SP_OK: 2059 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2060 break; 2061 case MD_SP_ERR: 2062 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2063 break; 2064 case MD_SP_RECOVER: 2065 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2066 break; 2067 } 2068 2069 if (rval == NULL) 2070 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2071 2072 return (rval); 2073 } 2074 2075 /* 2076 * FUNCTION: meta_sp_report() 2077 * INPUT: sp - the set name for the unit being displayed 2078 * msp - the unit structure to display 2079 * nlpp - pass back the large devs 2080 * fp - the file pointer to send output to 2081 * options - print options from the command line processor 2082 * OUTPUT: ep - return error pointer 2083 * RETURNS: int - -1 if error, 0 on success 2084 * PURPOSE: print a full report of the device specified 2085 */ 2086 static int 2087 meta_sp_report( 2088 mdsetname_t *sp, 2089 md_sp_t *msp, 2090 mdnamelist_t **nlpp, 2091 char *fname, 2092 FILE *fp, 2093 mdprtopts_t options, 2094 md_error_t *ep 2095 ) 2096 { 2097 uint_t extn; 2098 char *status; 2099 char *devid = ""; 2100 mdname_t *didnp = NULL; 2101 ddi_devid_t dtp; 2102 int len; 2103 uint_t tstate = 0; 2104 2105 if (options & PRINT_LARGEDEVICES) { 2106 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2107 return (0); 2108 } else { 2109 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2110 return (-1); 2111 } 2112 } 2113 2114 if (options & PRINT_FN) { 2115 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2116 return (0); 2117 } else { 2118 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2119 return (-1); 2120 } 2121 } 2122 2123 if (options & PRINT_HEADER) { 2124 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2125 msp->common.namep->cname) == EOF) 2126 return (mdsyserror(ep, errno, fname)); 2127 } 2128 2129 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2130 msp->compnamep->cname) == EOF) 2131 return (mdsyserror(ep, errno, fname)); 2132 2133 /* Determine if device is available before displaying status */ 2134 if (metaismeta(msp->common.namep)) { 2135 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2136 return (-1); 2137 } 2138 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2139 2140 /* print out "State" to be consistent with other metadevices */ 2141 if (tstate & MD_ABR_CAP) { 2142 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2143 " State: %s - Application Based Recovery (ABR)\n"), 2144 status) == EOF) { 2145 Free(status); 2146 return (mdsyserror(ep, errno, fname)); 2147 } 2148 } else { 2149 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2150 " State: %s\n"), status) == EOF) { 2151 Free(status); 2152 return (mdsyserror(ep, errno, fname)); 2153 } 2154 } 2155 free(status); 2156 2157 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2158 msp->common.size, 2159 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2160 return (mdsyserror(ep, errno, fname)); 2161 2162 /* print component details */ 2163 if (! metaismeta(msp->compnamep)) { 2164 diskaddr_t start_blk; 2165 int has_mddb; 2166 char *has_mddb_str; 2167 2168 /* print header */ 2169 /* 2170 * Building a format string on the fly that will 2171 * be used in (f)printf. This allows the length 2172 * of the ctd to vary from small to large without 2173 * looking horrible. 2174 */ 2175 len = strlen(msp->compnamep->cname); 2176 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2177 len += 2; 2178 if (fprintf(fp, 2179 "\t%-*.*s %-12.12s %-5.5s %s\n", 2180 len, len, 2181 dgettext(TEXT_DOMAIN, "Device"), 2182 dgettext(TEXT_DOMAIN, "Start Block"), 2183 dgettext(TEXT_DOMAIN, "Dbase"), 2184 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2185 return (mdsyserror(ep, errno, fname)); 2186 } 2187 2188 2189 /* get info */ 2190 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2191 MD_DISKADDR_ERROR) 2192 return (-1); 2193 2194 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2195 return (-1); 2196 2197 if (has_mddb) 2198 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2199 else 2200 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2201 2202 /* populate the key in the name_p structure */ 2203 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2204 if (didnp == NULL) { 2205 return (-1); 2206 } 2207 2208 /* determine if devid does NOT exist */ 2209 if (options & PRINT_DEVID) { 2210 if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep), 2211 didnp->key, ep)) == NULL) 2212 devid = dgettext(TEXT_DOMAIN, "No "); 2213 else { 2214 devid = dgettext(TEXT_DOMAIN, "Yes"); 2215 free(dtp); 2216 } 2217 } 2218 2219 /* print info */ 2220 /* 2221 * This allows the length 2222 * of the ctd to vary from small to large without 2223 * looking horrible. 2224 */ 2225 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2226 len, msp->compnamep->cname, 2227 start_blk, has_mddb_str, devid) == EOF) { 2228 return (mdsyserror(ep, errno, fname)); 2229 } 2230 (void) fprintf(fp, "\n"); 2231 } 2232 2233 2234 /* print the headers */ 2235 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2236 dgettext(TEXT_DOMAIN, "Extent"), 2237 dgettext(TEXT_DOMAIN, "Start Block"), 2238 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2239 return (mdsyserror(ep, errno, fname)); 2240 2241 /* print out each extent */ 2242 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2243 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2244 2245 /* If PRINT_TIMES option is ever supported, add output here */ 2246 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2247 extn, extp->poff, extp->len) == EOF) 2248 return (mdsyserror(ep, errno, fname)); 2249 } 2250 2251 /* separate records with a newline */ 2252 (void) fprintf(fp, "\n"); 2253 return (0); 2254 } 2255 2256 /* 2257 * FUNCTION: meta_sp_print() 2258 * INPUT: sp - the set name for the unit being displayed 2259 * np - the name of the device to print 2260 * fname - ??? not used 2261 * fp - the file pointer to send output to 2262 * options - print options from the command line processor 2263 * OUTPUT: ep - return error pointer 2264 * RETURNS: int - -1 if error, 0 on success 2265 * PURPOSE: print a full report of the device specified by metastat. 2266 * This is the main entry point for printing. 2267 */ 2268 int 2269 meta_sp_print( 2270 mdsetname_t *sp, 2271 mdname_t *np, 2272 mdnamelist_t **nlpp, 2273 char *fname, 2274 FILE *fp, 2275 mdprtopts_t options, 2276 md_error_t *ep 2277 ) 2278 { 2279 md_sp_t *msp; 2280 md_unit_t *mdp; 2281 int rval = 0; 2282 2283 /* should always have the same set */ 2284 assert(sp != NULL); 2285 2286 /* print all the soft partitions */ 2287 if (np == NULL) { 2288 mdnamelist_t *nlp = NULL; 2289 mdnamelist_t *p; 2290 int cnt; 2291 2292 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2293 return (-1); 2294 else if (cnt == 0) 2295 return (0); 2296 2297 /* recusively print them out */ 2298 for (p = nlp; (p != NULL); p = p->next) { 2299 mdname_t *curnp = p->namep; 2300 2301 /* 2302 * one problem with the rval of -1 here is that 2303 * the error gets "lost" when the next device is 2304 * printed, but we want to print them all anyway. 2305 */ 2306 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2307 options, ep); 2308 } 2309 2310 /* clean up, return success */ 2311 metafreenamelist(nlp); 2312 return (rval); 2313 } 2314 2315 /* get the unit structure */ 2316 if ((msp = meta_get_sp_common(sp, np, 2317 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2318 return (-1); 2319 2320 /* check for parented */ 2321 if ((! (options & PRINT_SUBDEVS)) && 2322 (MD_HAS_PARENT(msp->common.parent))) { 2323 return (0); 2324 } 2325 2326 /* print appropriate detail */ 2327 if (options & PRINT_SHORT) { 2328 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2329 return (-1); 2330 } else { 2331 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2332 return (-1); 2333 } 2334 2335 /* 2336 * Print underlying metadevices if they are parented to us and 2337 * if the info for the underlying metadevice has not been printed. 2338 */ 2339 if (metaismeta(msp->compnamep)) { 2340 /* get the unit structure for the subdevice */ 2341 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2342 return (-1); 2343 2344 /* If info not already printed, recurse */ 2345 if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) { 2346 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2347 (options | PRINT_HEADER | PRINT_SUBDEVS), 2348 NULL, ep) != 0) { 2349 return (-1); 2350 } 2351 BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp))); 2352 } 2353 } 2354 return (0); 2355 } 2356 2357 /* 2358 * ************************************************************************** 2359 * Watermark Manipulation Functions * 2360 * ************************************************************************** 2361 */ 2362 2363 /* 2364 * FUNCTION: meta_sp_get_start() 2365 * INPUT: sp - the operating set 2366 * np - device upon which the sp is being built 2367 * OUTPUT: ep - return error pointer 2368 * RETURNS: daddr_t - -1 if error, otherwise the start block 2369 * PURPOSE: Encapsulate the determination of the start block of the 2370 * device upon which the sp is built or being built. 2371 */ 2372 static diskaddr_t 2373 meta_sp_get_start( 2374 mdsetname_t *sp, 2375 mdname_t *np, 2376 md_error_t *ep 2377 ) 2378 { 2379 daddr_t start_block; 2380 2381 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) 2382 start_block += MD_SP_START; 2383 2384 return (start_block); 2385 } 2386 2387 /* 2388 * FUNCTION: meta_sp_update_wm() 2389 * INPUT: sp - the operating set 2390 * msp - a pointer to the XDR unit structure 2391 * extlist - the extent list specifying watermarks to update 2392 * OUTPUT: ep - return error pointer 2393 * RETURNS: int - -1 if error, 0 on success 2394 * PURPOSE: steps backwards through the extent list updating 2395 * watermarks for all extents with the EXTFLG_UPDATE flag 2396 * set. Writing the watermarks guarantees consistency when 2397 * extents must be broken into pieces since the original 2398 * watermark will be the last to be updated, and will be 2399 * changed to point to a new watermark that is already 2400 * known to be consistent. If one of the writes fails, the 2401 * original watermark stays intact and none of the changes 2402 * are realized. 2403 */ 2404 static int 2405 meta_sp_update_wm( 2406 mdsetname_t *sp, 2407 md_sp_t *msp, 2408 sp_ext_node_t *extlist, 2409 md_error_t *ep 2410 ) 2411 { 2412 sp_ext_node_t *ext; 2413 sp_ext_node_t *tail; 2414 mp_watermark_t *wmp, *watermarks; 2415 xsp_offset_t *osp, *offsets; 2416 int update_count = 0; 2417 int rval = 0; 2418 md_unit_t *mdp; 2419 md_sp_update_wm_t update_params; 2420 2421 if (getenv(META_SP_DEBUG)) { 2422 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2423 meta_sp_list_dump(extlist); 2424 } 2425 2426 /* 2427 * find the last node so we can write the watermarks backwards 2428 * and count watermarks to update so we can allocate space 2429 */ 2430 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2431 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2432 update_count++; 2433 } 2434 2435 if (ext->ext_next == NULL) { 2436 tail = ext; 2437 } 2438 } 2439 ext = tail; 2440 2441 wmp = watermarks = 2442 Zalloc(update_count * sizeof (mp_watermark_t)); 2443 osp = offsets = 2444 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2445 2446 while (ext != NULL) { 2447 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2448 /* update watermark */ 2449 wmp->wm_magic = MD_SP_MAGIC; 2450 wmp->wm_version = MD_SP_VERSION; 2451 wmp->wm_type = ext->ext_type; 2452 wmp->wm_seq = ext->ext_seq; 2453 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2454 2455 /* fill in the volume name and set name */ 2456 if (ext->ext_namep != NULL) 2457 (void) strcpy(wmp->wm_mdname, 2458 ext->ext_namep->cname); 2459 else 2460 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2461 if (ext->ext_setp != NULL && 2462 ext->ext_setp->setno != MD_LOCAL_SET) 2463 (void) strcpy(wmp->wm_setname, 2464 ext->ext_setp->setname); 2465 else 2466 (void) strcpy(wmp->wm_setname, 2467 MD_SP_LOCALSETNAME); 2468 2469 /* Generate the checksum */ 2470 wmp->wm_checksum = 0; 2471 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2472 sizeof (*wmp), NULL); 2473 2474 /* record the extent offset */ 2475 *osp = ext->ext_offset; 2476 2477 /* Advance the placeholders */ 2478 osp++; wmp++; 2479 } 2480 ext = ext->ext_prev; 2481 } 2482 2483 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2484 if (mdp == NULL) { 2485 rval = -1; 2486 goto out; 2487 } 2488 2489 (void) memset(&update_params, 0, sizeof (update_params)); 2490 update_params.mnum = MD_SID(mdp); 2491 update_params.count = update_count; 2492 update_params.wmp = (uintptr_t)watermarks; 2493 update_params.osp = (uintptr_t)offsets; 2494 MD_SETDRIVERNAME(&update_params, MD_SP, 2495 MD_MIN2SET(update_params.mnum)); 2496 2497 if (metaioctl(MD_IOC_SPUPDATEWM, &update_params, 2498 &update_params.mde, msp->common.namep->cname) != 0) { 2499 (void) mdstealerror(ep, &update_params.mde); 2500 rval = -1; 2501 goto out; 2502 } 2503 2504 out: 2505 Free(watermarks); 2506 Free(offsets); 2507 2508 return (rval); 2509 } 2510 2511 /* 2512 * FUNCTION: meta_sp_clear_wm() 2513 * INPUT: sp - the operating set 2514 * msp - the unit structure for the soft partition to clear 2515 * OUTPUT: ep - return error pointer 2516 * RETURNS: int - -1 if error, 0 on success 2517 * PURPOSE: steps through the extents for a soft partition unit and 2518 * creates an extent list designed to mark all of the 2519 * watermarks for those extents as free. The extent list 2520 * is then passed to meta_sp_update_wm() to actually write 2521 * the watermarks out. 2522 */ 2523 static int 2524 meta_sp_clear_wm( 2525 mdsetname_t *sp, 2526 md_sp_t *msp, 2527 md_error_t *ep 2528 ) 2529 { 2530 sp_ext_node_t *extlist = NULL; 2531 int numexts = msp->ext.ext_len; 2532 uint_t i; 2533 int rval = 0; 2534 2535 /* for each watermark must set the flag to SP_FREE */ 2536 for (i = 0; i < numexts; i++) { 2537 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2538 2539 meta_sp_list_insert(NULL, NULL, &extlist, 2540 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2541 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2542 } 2543 2544 /* update watermarks */ 2545 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2546 2547 meta_sp_list_free(&extlist); 2548 return (rval); 2549 } 2550 2551 /* 2552 * FUNCTION: meta_sp_read_wm() 2553 * INPUT: sp - setname for component 2554 * compnp - mdname_t for component 2555 * offset - the offset of the watermark to read (sectors) 2556 * OUTPUT: wm - the watermark structure to read into 2557 * ep - return error pointer 2558 * RETURNS: int - -1 if error, 0 on success 2559 * PURPOSE: seeks out to the requested offset and reads a watermark. 2560 * It then verifies that the magic number is correct and 2561 * that the checksum is valid, returning an error if either 2562 * is wrong. 2563 */ 2564 static int 2565 meta_sp_read_wm( 2566 mdsetname_t *sp, 2567 mdname_t *compnp, 2568 mp_watermark_t *wm, 2569 sp_ext_offset_t offset, 2570 md_error_t *ep 2571 ) 2572 { 2573 md_sp_read_wm_t read_params; 2574 2575 /* 2576 * make sure block offset does not overflow 2^64 bytes and it's a 2577 * multiple of the block size. 2578 */ 2579 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2580 /* LINTED */ 2581 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2582 2583 (void) memset(wm, 0, sizeof (*wm)); 2584 2585 (void) memset(&read_params, 0, sizeof (read_params)); 2586 read_params.rdev = compnp->dev; 2587 read_params.wmp = (uintptr_t)wm; 2588 read_params.offset = offset; 2589 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2590 2591 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2592 &read_params.mde, compnp->cname) != 0) { 2593 2594 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2595 "Extent header read failed, block %llu.\n"), offset); 2596 return (mdstealerror(ep, &read_params.mde)); 2597 } 2598 2599 /* make sure magic number is correct */ 2600 if (wm->wm_magic != MD_SP_MAGIC) { 2601 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2602 "found incorrect magic number %x, expected %x.\n"), 2603 wm->wm_magic, MD_SP_MAGIC); 2604 /* 2605 * Pass NULL for the device name as we don't have 2606 * valid watermark contents. 2607 */ 2608 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2609 } 2610 2611 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2612 sizeof (*wm), NULL)) { 2613 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2614 "found incorrect checksum %x.\n"), 2615 wm->wm_checksum); 2616 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2617 } 2618 2619 return (0); 2620 } 2621 2622 /* 2623 * ************************************************************************** 2624 * Query Functions 2625 * ************************************************************************** 2626 */ 2627 2628 /* 2629 * IMPORTANT NOTE: This is a static function that assumes that 2630 * its input parameters have been checked and 2631 * have valid values that lie within acceptable 2632 * ranges. 2633 * 2634 * FUNCTION: meta_sp_enough_space() 2635 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2636 * must be > 0 2637 * desired_sp_size - the desired soft partition size in blocks; 2638 * must be > 0 2639 * extent_listpp - a reference to a reference to an extent 2640 * list that lists the extents on a device; 2641 * must be a reference to a reference to a 2642 * valid extent list 2643 * alignment - the desired data space alignment for the sp's 2644 * OUTPUT: boolean_t return value 2645 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2646 * list to create the desired soft partitions, 2647 * B_FALSE if there's not enough space 2648 * PURPOSE: determines whether there's enough free space in an extent 2649 * list to allow creation of a set of soft partitions 2650 */ 2651 static boolean_t 2652 meta_sp_enough_space( 2653 int desired_number_of_sps, 2654 blkcnt_t desired_sp_size, 2655 sp_ext_node_t **extent_listpp, 2656 sp_ext_length_t alignment 2657 ) 2658 { 2659 boolean_t enough_space; 2660 int number_of_sps; 2661 int number_of_extents_used; 2662 sp_ext_length_t desired_ext_length = desired_sp_size; 2663 2664 enough_space = B_TRUE; 2665 number_of_sps = 0; 2666 while ((enough_space == B_TRUE) && 2667 (number_of_sps < desired_number_of_sps)) { 2668 /* 2669 * Use the extent allocation algorithm implemented by 2670 * meta_sp_alloc_by_len() to test whether the free 2671 * extents in the extent list referenced by *extent_listpp 2672 * contain enough space to accomodate a soft partition 2673 * of size desired_ext_length. 2674 * 2675 * Repeat the test <desired_number_of_sps> times 2676 * or until it fails, whichever comes first, 2677 * each time allocating the extents required to 2678 * create the soft partition without actually 2679 * creating the soft partition. 2680 */ 2681 number_of_extents_used = meta_sp_alloc_by_len( 2682 TEST_SETNAMEP, 2683 TEST_SOFT_PARTITION_NAMEP, 2684 extent_listpp, 2685 &desired_ext_length, 2686 NO_OFFSET, 2687 alignment); 2688 if (number_of_extents_used == -1) { 2689 enough_space = B_FALSE; 2690 } else { 2691 number_of_sps++; 2692 } 2693 } 2694 return (enough_space); 2695 } 2696 2697 /* 2698 * IMPORTANT NOTE: This is a static function that calls other functions 2699 * that check its mdsetnamep and device_mdnamep 2700 * input parameters, but expects extent_listpp to 2701 * be a initialized to a valid address to which 2702 * it can write a reference to the extent list that 2703 * it creates. 2704 * 2705 * FUNCTION: meta_sp_get_extent_list() 2706 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2707 * for the set containing the device for 2708 * which the extents are to be listed 2709 * device_mdnamep - a reference to the mdname_t structure 2710 * for the device for which the extents 2711 * are to be listed 2712 * OUTPUT: *extent_listpp - a reference to the extent list for 2713 * the device; NULL if the function fails 2714 * *ep - the libmeta error encountered, if any 2715 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2716 * B_FALSE if not 2717 * PURPOSE: gets the extent list for a device 2718 */ 2719 static boolean_t 2720 meta_sp_get_extent_list( 2721 mdsetname_t *mdsetnamep, 2722 mdname_t *device_mdnamep, 2723 sp_ext_node_t **extent_listpp, 2724 md_error_t *ep 2725 ) 2726 { 2727 diskaddr_t device_size_in_blocks; 2728 mdnamelist_t *sp_name_listp; 2729 diskaddr_t start_block_address_in_blocks; 2730 2731 *extent_listpp = NULL; 2732 sp_name_listp = NULL; 2733 2734 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2735 device_mdnamep, 2736 ep); 2737 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2738 if (getenv(META_SP_DEBUG)) { 2739 mde_perror(ep, "meta_sp_get_extent_list:meta_sp_get_start"); 2740 } 2741 return (B_FALSE); 2742 } 2743 2744 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2745 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2746 if (getenv(META_SP_DEBUG)) { 2747 mde_perror(ep, 2748 "meta_sp_get_extent_list:metagetsize"); 2749 } 2750 return (B_FALSE); 2751 } 2752 2753 /* 2754 * Sanity check: the start block will have skipped an integer 2755 * number of cylinders, C. C will usually be zero. If (C > 0), 2756 * and the disk slice happens to only be C cylinders in total 2757 * size, we'll fail this check. 2758 */ 2759 if (device_size_in_blocks <= 2760 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2761 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2762 return (B_FALSE); 2763 } 2764 2765 /* 2766 * After this point, we will have allocated resources, so any 2767 * failure returns must be through the supplied "fail" label 2768 * to properly deallocate things. 2769 */ 2770 2771 /* 2772 * Create an empty extent list that starts one watermark past 2773 * the start block of the device and ends one watermark before 2774 * the end of the device. 2775 */ 2776 meta_sp_list_insert(TEST_SETNAMEP, 2777 TEST_SOFT_PARTITION_NAMEP, 2778 extent_listpp, 2779 NO_OFFSET, 2780 (sp_ext_length_t)start_block_address_in_blocks, 2781 EXTTYP_RESERVED, 2782 NO_SEQUENCE_NUMBER, 2783 NO_FLAGS, 2784 meta_sp_cmp_by_offset); 2785 meta_sp_list_insert(TEST_SETNAMEP, 2786 TEST_SOFT_PARTITION_NAMEP, 2787 extent_listpp, 2788 (sp_ext_offset_t)(device_size_in_blocks - 2789 MD_SP_WMSIZE), 2790 MD_SP_WMSIZE, 2791 EXTTYP_END, 2792 NO_SEQUENCE_NUMBER, 2793 NO_FLAGS, 2794 meta_sp_cmp_by_offset); 2795 2796 /* 2797 * Get the list of soft partitions that are already on the 2798 * device. 2799 */ 2800 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2801 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2802 if (getenv(META_SP_DEBUG)) { 2803 mde_perror(ep, 2804 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2805 } 2806 goto fail; 2807 } 2808 2809 if (sp_name_listp != NULL) { 2810 /* 2811 * If there are soft partitions on the device, add the 2812 * extents used in them to the extent list. 2813 */ 2814 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2815 extent_listpp, ep) == -1) { 2816 if (getenv(META_SP_DEBUG)) { 2817 mde_perror(ep, "meta_sp_get_extent_list:" 2818 "meta_sp_extlist_from_namelist"); 2819 } 2820 goto fail; 2821 } 2822 metafreenamelist(sp_name_listp); 2823 } 2824 2825 /* 2826 * Add free extents to the extent list to represent 2827 * the remaining regions of free space on the 2828 * device. 2829 */ 2830 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2831 return (B_TRUE); 2832 2833 fail: 2834 if (sp_name_listp != NULL) { 2835 metafreenamelist(sp_name_listp); 2836 } 2837 2838 if (*extent_listpp != NULL) { 2839 /* 2840 * meta_sp_list_free sets *extent_listpp to NULL. 2841 */ 2842 meta_sp_list_free(extent_listpp); 2843 } 2844 return (B_FALSE); 2845 } 2846 2847 /* 2848 * IMPORTANT NOTE: This is a static function that calls other functions 2849 * that check its mdsetnamep and mddrivenamep 2850 * input parameters, but expects extent_listpp to 2851 * be a initialized to a valid address to which 2852 * it can write a reference to the extent list that 2853 * it creates. 2854 * 2855 * FUNCTION: meta_sp_get_extent_list_for_drive() 2856 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2857 * for the set containing the drive for 2858 * which the extents are to be listed 2859 * mddrivenamep - a reference to the mddrivename_t structure 2860 * for the drive for which the extents 2861 * are to be listed 2862 * OUTPUT: *extent_listpp - a reference to the extent list for 2863 * the drive; NULL if the function fails 2864 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2865 * B_FALSE if not 2866 * PURPOSE: gets the extent list for a drive when the entire drive 2867 * is to be soft partitioned 2868 */ 2869 static boolean_t 2870 meta_sp_get_extent_list_for_drive( 2871 mdsetname_t *mdsetnamep, 2872 mddrivename_t *mddrivenamep, 2873 sp_ext_node_t **extent_listpp 2874 ) 2875 { 2876 boolean_t can_use; 2877 diskaddr_t free_space; 2878 md_error_t mderror; 2879 mdvtoc_t proposed_vtoc; 2880 int repartition_options; 2881 int return_value; 2882 md_sp_t test_sp_struct; 2883 2884 can_use = B_TRUE; 2885 *extent_listpp = NULL; 2886 mderror = mdnullerror; 2887 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2888 &mderror); 2889 if (test_sp_struct.compnamep == NULL) { 2890 can_use = B_FALSE; 2891 } 2892 2893 if (can_use == B_TRUE) { 2894 mderror = mdnullerror; 2895 repartition_options = 0; 2896 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2897 MDCMD_USE_WHOLE_DISK, &repartition_options, 2898 &mderror); 2899 if (return_value != 0) { 2900 can_use = B_FALSE; 2901 } 2902 } 2903 2904 if (can_use == B_TRUE) { 2905 mderror = mdnullerror; 2906 repartition_options = repartition_options | 2907 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2908 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2909 repartition_options, &proposed_vtoc, &mderror); 2910 if (return_value != 0) { 2911 can_use = B_FALSE; 2912 } 2913 } 2914 2915 if (can_use == B_TRUE) { 2916 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2917 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2918 can_use = B_FALSE; 2919 } 2920 } 2921 2922 if (can_use == B_TRUE) { 2923 /* 2924 * Create an extent list that starts with 2925 * a reserved extent that ends at the start 2926 * of the usable space on slice zero of the 2927 * proposed VTOC, ends with an extent that 2928 * reserves space for a watermark at the end 2929 * of slice zero, and contains a single free 2930 * extent that occupies the rest of the space 2931 * on the slice. 2932 * 2933 * NOTE: 2934 * 2935 * Don't use metagetstart() or metagetsize() to 2936 * find the usable space. They query the mdname_t 2937 * structure that represents an actual device to 2938 * determine the amount of space on the device that 2939 * contains metadata and the total amount of space 2940 * on the device. Since this function creates a 2941 * proposed extent list that doesn't reflect the 2942 * state of an actual device, there's no mdname_t 2943 * structure to be queried. 2944 * 2945 * When a drive is reformatted to prepare for 2946 * soft partitioning, all of slice seven is 2947 * reserved for metadata, all of slice zero is 2948 * available for soft partitioning, and all other 2949 * slices on the drive are empty. The proposed 2950 * extent list for the drive therefore contains 2951 * only three extents: a reserved extent that ends 2952 * at the start of the usable space on slice zero, 2953 * a single free extent that occupies all the usable 2954 * space on slice zero, and an ending extent that 2955 * reserves space for a watermark at the end of 2956 * slice zero. 2957 */ 2958 meta_sp_list_insert(TEST_SETNAMEP, 2959 TEST_SOFT_PARTITION_NAMEP, 2960 extent_listpp, 2961 NO_OFFSET, 2962 (sp_ext_length_t)(MD_SP_START), 2963 EXTTYP_RESERVED, 2964 NO_SEQUENCE_NUMBER, 2965 NO_FLAGS, 2966 meta_sp_cmp_by_offset); 2967 meta_sp_list_insert(TEST_SETNAMEP, 2968 TEST_SOFT_PARTITION_NAMEP, 2969 extent_listpp, 2970 (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2971 MD_SP_WMSIZE, 2972 EXTTYP_END, 2973 NO_SEQUENCE_NUMBER, 2974 NO_FLAGS, 2975 meta_sp_cmp_by_offset); 2976 meta_sp_list_freefill(extent_listpp, free_space); 2977 } 2978 return (can_use); 2979 } 2980 2981 /* 2982 * FUNCTION: meta_sp_can_create_sps() 2983 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2984 * for the set containing the device for 2985 * which the extents are to be listed 2986 * mdnamep - a reference to the mdname_t of the device 2987 * on which the soft parititions are to be created 2988 * number_of_sps - the desired number of soft partitions 2989 * sp_size - the desired soft partition size 2990 * OUTPUT: boolean_t return value 2991 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 2992 * B_FALSE if not 2993 * PURPOSE: determines whether a set of soft partitions can be created 2994 * on a device 2995 */ 2996 boolean_t 2997 meta_sp_can_create_sps( 2998 mdsetname_t *mdsetnamep, 2999 mdname_t *mdnamep, 3000 int number_of_sps, 3001 blkcnt_t sp_size 3002 ) 3003 { 3004 sp_ext_node_t *extent_listp; 3005 boolean_t succeeded; 3006 md_error_t mde; 3007 3008 if ((number_of_sps > 0) && (sp_size > 0)) { 3009 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3010 &extent_listp, &mde); 3011 } else { 3012 succeeded = B_FALSE; 3013 } 3014 3015 /* 3016 * We don't really care about an error return from the 3017 * alignment call; that will just result in passing zero, 3018 * which will be interpreted as no alignment. 3019 */ 3020 3021 if (succeeded == B_TRUE) { 3022 succeeded = meta_sp_enough_space(number_of_sps, 3023 sp_size, &extent_listp, 3024 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3025 meta_sp_list_free(&extent_listp); 3026 } 3027 return (succeeded); 3028 } 3029 3030 /* 3031 * FUNCTION: meta_sp_can_create_sps_on_drive() 3032 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3033 * for the set containing the drive for 3034 * which the extents are to be listed 3035 * mddrivenamep - a reference to the mddrivename_t of the drive 3036 * on which the soft parititions are to be created 3037 * number_of_sps - the desired number of soft partitions 3038 * sp_size - the desired soft partition size 3039 * OUTPUT: boolean_t return value 3040 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3041 * B_FALSE if not 3042 * PURPOSE: determines whether a set of soft partitions can be created 3043 * on a drive if the entire drive is soft partitioned 3044 */ 3045 boolean_t 3046 meta_sp_can_create_sps_on_drive( 3047 mdsetname_t *mdsetnamep, 3048 mddrivename_t *mddrivenamep, 3049 int number_of_sps, 3050 blkcnt_t sp_size 3051 ) 3052 { 3053 sp_ext_node_t *extent_listp; 3054 boolean_t succeeded; 3055 3056 if ((number_of_sps > 0) && (sp_size > 0)) { 3057 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3058 mddrivenamep, 3059 &extent_listp); 3060 } else { 3061 succeeded = B_FALSE; 3062 } 3063 3064 /* 3065 * We don't care about alignment on the space call because 3066 * we're specifically dealing with a drive, which will have no 3067 * inherent alignment. 3068 */ 3069 3070 if (succeeded == B_TRUE) { 3071 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3072 &extent_listp, SP_UNALIGNED); 3073 meta_sp_list_free(&extent_listp); 3074 } 3075 return (succeeded); 3076 } 3077 3078 /* 3079 * FUNCTION: meta_sp_get_free_space() 3080 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3081 * for the set containing the device for 3082 * which the free space is to be returned 3083 * mdnamep - a reference to the mdname_t of the device 3084 * for which the free space is to be returned 3085 * OUTPUT: blkcnt_t return value 3086 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3087 * PURPOSE: returns the number of blocks of free space on a device 3088 */ 3089 blkcnt_t 3090 meta_sp_get_free_space( 3091 mdsetname_t *mdsetnamep, 3092 mdname_t *mdnamep 3093 ) 3094 { 3095 sp_ext_node_t *extent_listp; 3096 sp_ext_length_t free_blocks; 3097 boolean_t succeeded; 3098 md_error_t mde; 3099 3100 extent_listp = NULL; 3101 free_blocks = 0; 3102 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3103 &extent_listp, &mde); 3104 if (succeeded == B_TRUE) { 3105 free_blocks = meta_sp_list_size(extent_listp, 3106 EXTTYP_FREE, INCLUDE_WM); 3107 meta_sp_list_free(&extent_listp); 3108 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3109 /* 3110 * Subtract a safety margin for watermarks when 3111 * computing the number of blocks available for 3112 * use. The actual number of watermarks can't 3113 * be calculated without knowing the exact numbers 3114 * and sizes of both the free extents and the soft 3115 * partitions to be created. The calculation is 3116 * highly complex and error-prone even if those 3117 * quantities are known. The approximate value 3118 * 10 * MD_SP_WMSIZE is within a few blocks of the 3119 * correct value in all practical cases. 3120 */ 3121 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3122 } else { 3123 free_blocks = 0; 3124 } 3125 } else { 3126 mdclrerror(&mde); 3127 } 3128 3129 return (free_blocks); 3130 } 3131 3132 /* 3133 * FUNCTION: meta_sp_get_free_space_on_drive() 3134 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3135 * for the set containing the drive for 3136 * which the free space is to be returned 3137 * mddrivenamep - a reference to the mddrivename_t of the drive 3138 * for which the free space is to be returned 3139 * OUTPUT: blkcnt_t return value 3140 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3141 * PURPOSE: returns the number of blocks of space usable for soft 3142 * partitions on an entire drive, if the entire drive is 3143 * soft partitioned 3144 */ 3145 blkcnt_t 3146 meta_sp_get_free_space_on_drive( 3147 mdsetname_t *mdsetnamep, 3148 mddrivename_t *mddrivenamep 3149 ) 3150 { 3151 sp_ext_node_t *extent_listp; 3152 sp_ext_length_t free_blocks; 3153 boolean_t succeeded; 3154 3155 extent_listp = NULL; 3156 free_blocks = 0; 3157 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3158 mddrivenamep, &extent_listp); 3159 if (succeeded == B_TRUE) { 3160 free_blocks = meta_sp_list_size(extent_listp, 3161 EXTTYP_FREE, INCLUDE_WM); 3162 meta_sp_list_free(&extent_listp); 3163 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3164 /* 3165 * Subtract a safety margin for watermarks when 3166 * computing the number of blocks available for 3167 * use. The actual number of watermarks can't 3168 * be calculated without knowing the exact numbers 3169 * and sizes of both the free extents and the soft 3170 * partitions to be created. The calculation is 3171 * highly complex and error-prone even if those 3172 * quantities are known. The approximate value 3173 * 10 * MD_SP_WMSIZE is within a few blocks of the 3174 * correct value in all practical cases. 3175 */ 3176 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3177 } else { 3178 free_blocks = 0; 3179 } 3180 } 3181 return (free_blocks); 3182 } 3183 3184 /* 3185 * FUNCTION: meta_sp_get_number_of_possible_sps() 3186 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3187 * for the set containing the device for 3188 * which the number of possible soft partitions 3189 * is to be returned 3190 * mdnamep - a reference to the mdname_t of the device 3191 * for which the number of possible soft partitions 3192 * is to be returned 3193 * OUTPUT: int return value 3194 * RETURNS: int - the number of soft partitions of the desired size 3195 * that can be created on the device 3196 * PURPOSE: returns the number of soft partitions of a given size 3197 * that can be created on a device 3198 */ 3199 int 3200 meta_sp_get_number_of_possible_sps( 3201 mdsetname_t *mdsetnamep, 3202 mdname_t *mdnamep, 3203 blkcnt_t sp_size 3204 ) 3205 { 3206 sp_ext_node_t *extent_listp; 3207 int number_of_possible_sps; 3208 boolean_t succeeded; 3209 md_error_t mde; 3210 sp_ext_length_t alignment; 3211 3212 extent_listp = NULL; 3213 number_of_possible_sps = 0; 3214 if (sp_size > 0) { 3215 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3216 mdnamep, &extent_listp, &mde)) == B_FALSE) 3217 mdclrerror(&mde); 3218 } else { 3219 succeeded = B_FALSE; 3220 } 3221 3222 if (succeeded == B_TRUE) { 3223 alignment = meta_sp_get_default_alignment(mdsetnamep, 3224 mdnamep, &mde); 3225 } 3226 3227 while (succeeded == B_TRUE) { 3228 /* 3229 * Keep allocating space from the extent list 3230 * for soft partitions of the desired size until 3231 * there's not enough free space left in the list 3232 * for another soft partiition of that size. 3233 * Add one to the number of possible soft partitions 3234 * for each soft partition for which there is 3235 * enough free space left. 3236 */ 3237 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3238 sp_size, &extent_listp, alignment); 3239 if (succeeded == B_TRUE) { 3240 number_of_possible_sps++; 3241 } 3242 } 3243 if (extent_listp != NULL) { 3244 meta_sp_list_free(&extent_listp); 3245 } 3246 return (number_of_possible_sps); 3247 } 3248 3249 /* 3250 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3251 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3252 * for the set containing the drive for 3253 * which the number of possible soft partitions 3254 * is to be returned 3255 * mddrivenamep - a reference to the mddrivename_t of the drive 3256 * for which the number of possible soft partitions 3257 * is to be returned 3258 * sp_size - the size in blocks of the proposed soft partitions 3259 * OUTPUT: int return value 3260 * RETURNS: int - the number of soft partitions of the desired size 3261 * that can be created on the drive 3262 * PURPOSE: returns the number of soft partitions of a given size 3263 * that can be created on a drive, if the entire drive is 3264 * soft partitioned 3265 */ 3266 int 3267 meta_sp_get_number_of_possible_sps_on_drive( 3268 mdsetname_t *mdsetnamep, 3269 mddrivename_t *mddrivenamep, 3270 blkcnt_t sp_size 3271 ) 3272 { 3273 sp_ext_node_t *extent_listp; 3274 int number_of_possible_sps; 3275 boolean_t succeeded; 3276 3277 extent_listp = NULL; 3278 number_of_possible_sps = 0; 3279 if (sp_size > 0) { 3280 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3281 mddrivenamep, &extent_listp); 3282 } else { 3283 succeeded = B_FALSE; 3284 } 3285 while (succeeded == B_TRUE) { 3286 /* 3287 * Keep allocating space from the extent list 3288 * for soft partitions of the desired size until 3289 * there's not enough free space left in the list 3290 * for another soft partition of that size. 3291 * Add one to the number of possible soft partitions 3292 * for each soft partition for which there is 3293 * enough free space left. 3294 * 3295 * Since it's a drive, not a metadevice, make no 3296 * assumptions about alignment. 3297 */ 3298 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3299 sp_size, &extent_listp, SP_UNALIGNED); 3300 if (succeeded == B_TRUE) { 3301 number_of_possible_sps++; 3302 } 3303 } 3304 if (extent_listp != NULL) { 3305 meta_sp_list_free(&extent_listp); 3306 } 3307 return (number_of_possible_sps); 3308 } 3309 3310 /* 3311 * FUNCTION: meta_sp_get_possible_sp_size() 3312 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3313 * for the set containing the device for 3314 * which the possible soft partition size 3315 * is to be returned 3316 * mdnamep - a reference to the mdname_t of the device 3317 * for which the possible soft partition size 3318 * is to be returned 3319 * number_of_sps - the desired number of soft partitions 3320 * OUTPUT: blkcnt_t return value 3321 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3322 * PURPOSE: returns the maximum possible size of each of a given number of 3323 * soft partitions of equal size that can be created on a device 3324 */ 3325 blkcnt_t 3326 meta_sp_get_possible_sp_size( 3327 mdsetname_t *mdsetnamep, 3328 mdname_t *mdnamep, 3329 int number_of_sps 3330 ) 3331 { 3332 blkcnt_t free_blocks; 3333 blkcnt_t sp_size; 3334 boolean_t succeeded; 3335 3336 sp_size = 0; 3337 if (number_of_sps > 0) { 3338 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3339 sp_size = free_blocks / number_of_sps; 3340 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3341 number_of_sps, sp_size); 3342 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3343 /* 3344 * To compensate for space that may have been 3345 * occupied by watermarks, reduce sp_size by a 3346 * number of blocks equal to the number of soft 3347 * partitions desired, and test again to see 3348 * whether the desired number of soft partitions 3349 * can be created. 3350 */ 3351 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3352 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3353 number_of_sps, sp_size); 3354 } 3355 if (sp_size < 0) { 3356 sp_size = 0; 3357 } 3358 } 3359 return (sp_size); 3360 } 3361 3362 /* 3363 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3364 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3365 * for the set containing the drive for 3366 * which the possible soft partition size 3367 * is to be returned 3368 * mddrivenamep - a reference to the mddrivename_t of the drive 3369 * for which the possible soft partition size 3370 * is to be returned 3371 * number_of_sps - the desired number of soft partitions 3372 * OUTPUT: blkcnt_t return value 3373 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3374 * PURPOSE: returns the maximum possible size of each of a given number of 3375 * soft partitions of equal size that can be created on a drive 3376 * if the entire drive is soft partitioned 3377 */ 3378 blkcnt_t 3379 meta_sp_get_possible_sp_size_on_drive( 3380 mdsetname_t *mdsetnamep, 3381 mddrivename_t *mddrivenamep, 3382 int number_of_sps 3383 ) 3384 { 3385 blkcnt_t free_blocks; 3386 blkcnt_t sp_size; 3387 boolean_t succeeded; 3388 3389 sp_size = 0; 3390 if (number_of_sps > 0) { 3391 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3392 mddrivenamep); 3393 sp_size = free_blocks / number_of_sps; 3394 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3395 mddrivenamep, 3396 number_of_sps, sp_size); 3397 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3398 /* 3399 * To compensate for space that may have been 3400 * occupied by watermarks, reduce sp_size by a 3401 * number of blocks equal to the number of soft 3402 * partitions desired, and test again to see 3403 * whether the desired number of soft partitions 3404 * can be created. 3405 */ 3406 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3407 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3408 mddrivenamep, 3409 number_of_sps, sp_size); 3410 } 3411 if (sp_size < 0) { 3412 sp_size = 0; 3413 } 3414 } 3415 return (sp_size); 3416 } 3417 3418 /* 3419 * ************************************************************************** 3420 * Unit Structure Manipulation Functions * 3421 * ************************************************************************** 3422 */ 3423 3424 /* 3425 * FUNCTION: meta_sp_fillextarray() 3426 * INPUT: mp - the unit structure to fill 3427 * extlist - the list of extents to fill with 3428 * OUTPUT: none 3429 * RETURNS: void 3430 * PURPOSE: fills in the unit structure extent list with the extents 3431 * specified by extlist. Only extents in extlist with the 3432 * EXTFLG_UPDATE flag are changed in the unit structure, 3433 * and the index into the unit structure is the sequence 3434 * number in the extent list. After all of the nodes have 3435 * been updated the virtual offsets in the unit structure 3436 * are updated to reflect the new lengths. 3437 */ 3438 static void 3439 meta_sp_fillextarray( 3440 mp_unit_t *mp, 3441 sp_ext_node_t *extlist 3442 ) 3443 { 3444 int i; 3445 sp_ext_node_t *ext; 3446 sp_ext_offset_t curvoff = 0LL; 3447 3448 assert(mp != NULL); 3449 3450 /* go through the allocation list and fill in our unit structure */ 3451 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3452 if ((ext->ext_type == EXTTYP_ALLOC) && 3453 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3454 mp->un_ext[ext->ext_seq].un_poff = 3455 ext->ext_offset + MD_SP_WMSIZE; 3456 mp->un_ext[ext->ext_seq].un_len = 3457 ext->ext_length - MD_SP_WMSIZE; 3458 } 3459 } 3460 3461 for (i = 0; i < mp->un_numexts; i++) { 3462 assert(mp->un_ext[i].un_poff != 0); 3463 assert(mp->un_ext[i].un_len != 0); 3464 mp->un_ext[i].un_voff = curvoff; 3465 curvoff += mp->un_ext[i].un_len; 3466 } 3467 } 3468 3469 /* 3470 * FUNCTION: meta_sp_createunit() 3471 * INPUT: np - the name of the device to create a unit structure for 3472 * compnp - the name of the device the soft partition is on 3473 * extlist - the extent list to populate the new unit with 3474 * numexts - the number of extents in the extent list 3475 * len - the total size of the soft partition (sectors) 3476 * status - the initial status of the unit structure 3477 * OUTPUT: ep - return error pointer 3478 * RETURNS: mp_unit_t * - the new unit structure. 3479 * PURPOSE: allocates and fills in a new soft partition unit 3480 * structure to be passed to the soft partitioning driver 3481 * for creation. 3482 */ 3483 static mp_unit_t * 3484 meta_sp_createunit( 3485 mdname_t *np, 3486 mdname_t *compnp, 3487 sp_ext_node_t *extlist, 3488 int numexts, 3489 sp_ext_length_t len, 3490 sp_status_t status, 3491 md_error_t *ep 3492 ) 3493 { 3494 mp_unit_t *mp; 3495 uint_t ms_size; 3496 3497 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3498 (numexts * sizeof (mp->un_ext[0])); 3499 3500 mp = Zalloc(ms_size); 3501 3502 /* fill in fields in common unit structure */ 3503 mp->c.un_type = MD_METASP; 3504 mp->c.un_size = ms_size; 3505 MD_SID(mp) = meta_getminor(np->dev); 3506 mp->c.un_total_blocks = len; 3507 mp->c.un_actual_tb = len; 3508 3509 /* set up geometry */ 3510 (void) meta_sp_setgeom(np, compnp, mp, ep); 3511 3512 /* if we're building on metadevice we can't parent */ 3513 if (metaismeta(compnp)) 3514 MD_CAPAB(mp) = MD_CANT_PARENT; 3515 else 3516 MD_CAPAB(mp) = MD_CAN_PARENT; 3517 3518 /* fill soft partition-specific fields */ 3519 mp->un_dev = compnp->dev; 3520 mp->un_key = compnp->key; 3521 3522 /* mdname_t start_blk field is not 64-bit! */ 3523 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3524 mp->un_status = status; 3525 mp->un_numexts = numexts; 3526 mp->un_length = len; 3527 3528 /* fill in the extent array */ 3529 meta_sp_fillextarray(mp, extlist); 3530 3531 return (mp); 3532 } 3533 3534 /* 3535 * FUNCTION: meta_sp_updateunit() 3536 * INPUT: np - name structure for the metadevice being updated 3537 * old_un - the original unit structure that is being updated 3538 * extlist - the extent list to populate the new unit with 3539 * grow_len - the amount by which the partition is being grown 3540 * numexts - the number of extents in the extent list 3541 * ep - return error pointer 3542 * OUTPUT: none 3543 * RETURNS: mp_unit_t * - the updated unit structure 3544 * PURPOSE: allocates and fills in a new soft partition unit structure to 3545 * be passed to the soft partitioning driver for creation. The 3546 * old unit structure is first copied in, and then the updated 3547 * extents are changed in the new unit structure. This is 3548 * typically used when the size of an existing unit is changed. 3549 */ 3550 static mp_unit_t * 3551 meta_sp_updateunit( 3552 mdname_t *np, 3553 mp_unit_t *old_un, 3554 sp_ext_node_t *extlist, 3555 sp_ext_length_t grow_len, 3556 int numexts, 3557 md_error_t *ep 3558 ) 3559 { 3560 mp_unit_t *new_un; 3561 sp_ext_length_t new_len; 3562 uint_t new_size; 3563 3564 assert(old_un != NULL); 3565 assert(extlist != NULL); 3566 3567 /* allocate new unit structure and copy in old unit */ 3568 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3569 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3570 new_len = old_un->un_length + grow_len; 3571 new_un = Zalloc(new_size); 3572 bcopy(old_un, new_un, old_un->c.un_size); 3573 3574 /* update size and geometry information */ 3575 new_un->c.un_size = new_size; 3576 new_un->un_length = new_len; 3577 new_un->c.un_total_blocks = new_len; 3578 new_un->c.un_actual_tb = new_len; 3579 if (meta_adjust_geom((md_unit_t *)new_un, np, 3580 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3581 0, ep) != 0) { 3582 Free(new_un); 3583 return (NULL); 3584 } 3585 3586 /* update extent information */ 3587 new_un->un_numexts += numexts; 3588 3589 meta_sp_fillextarray(new_un, extlist); 3590 3591 return (new_un); 3592 } 3593 3594 /* 3595 * FUNCTION: meta_get_sp() 3596 * INPUT: sp - the set name for the device to get 3597 * np - the name of the device to get 3598 * OUTPUT: ep - return error pointer 3599 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3600 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3601 * for the named device. Just a wrapper for meta_get_sp_common(). 3602 */ 3603 md_sp_t * 3604 meta_get_sp( 3605 mdsetname_t *sp, 3606 mdname_t *np, 3607 md_error_t *ep 3608 ) 3609 { 3610 return (meta_get_sp_common(sp, np, 0, ep)); 3611 } 3612 3613 /* 3614 * FUNCTION: meta_get_sp_common() 3615 * INPUT: sp - the set name for the device to get 3616 * np - the name of the device to get 3617 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3618 * OUTPUT: ep - return error pointer 3619 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3620 * NULL if np is not a soft partition 3621 * PURPOSE: common routine for fetching a soft partition unit structure 3622 */ 3623 md_sp_t * 3624 meta_get_sp_common( 3625 mdsetname_t *sp, 3626 mdname_t *np, 3627 int fast, 3628 md_error_t *ep 3629 ) 3630 { 3631 mddrivename_t *dnp = np->drivenamep; 3632 char *miscname; 3633 mp_unit_t *mp; 3634 md_sp_t *msp; 3635 int i; 3636 3637 /* must have set */ 3638 assert(sp != NULL); 3639 3640 /* short circuit */ 3641 if (dnp->unitp != NULL) { 3642 if (dnp->unitp->type != MD_METASP) 3643 return (NULL); 3644 return ((md_sp_t *)dnp->unitp); 3645 } 3646 /* get miscname and unit */ 3647 if ((miscname = metagetmiscname(np, ep)) == NULL) 3648 return (NULL); 3649 3650 if (strcmp(miscname, MD_SP) != 0) { 3651 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3652 return (NULL); 3653 } 3654 3655 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3656 return (NULL); 3657 3658 assert(mp->c.un_type == MD_METASP); 3659 3660 /* allocate soft partition */ 3661 msp = Zalloc(sizeof (*msp)); 3662 3663 /* get the common information */ 3664 msp->common.namep = np; 3665 msp->common.type = mp->c.un_type; 3666 msp->common.state = mp->c.un_status; 3667 msp->common.capabilities = mp->c.un_capabilities; 3668 msp->common.parent = mp->c.un_parent; 3669 msp->common.size = mp->c.un_total_blocks; 3670 msp->common.user_flags = mp->c.un_user_flags; 3671 msp->common.revision = mp->c.un_revision; 3672 3673 /* get soft partition information */ 3674 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3675 goto out; 3676 3677 /* 3678 * Fill in the key and the start block. Note that the start 3679 * block in the unit structure is 64 bits but the name pointer 3680 * only supports 32 bits. 3681 */ 3682 msp->compnamep->key = mp->un_key; 3683 msp->compnamep->start_blk = mp->un_start_blk; 3684 3685 /* fill in status field */ 3686 msp->status = mp->un_status; 3687 3688 /* allocate the extents */ 3689 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3690 msp->ext.ext_len = mp->un_numexts; 3691 3692 /* do the extents for this soft partition */ 3693 for (i = 0; i < mp->un_numexts; i++) { 3694 struct mp_ext *mde = &mp->un_ext[i]; 3695 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3696 3697 extp->voff = mde->un_voff; 3698 extp->poff = mde->un_poff; 3699 extp->len = mde->un_len; 3700 } 3701 3702 /* cleanup, return success */ 3703 Free(mp); 3704 dnp->unitp = (md_common_t *)msp; 3705 return (msp); 3706 3707 out: 3708 /* clean up and return error */ 3709 Free(mp); 3710 Free(msp); 3711 return (NULL); 3712 } 3713 3714 3715 /* 3716 * FUNCTION: meta_init_sp() 3717 * INPUT: spp - the set name for the new device 3718 * argc - the remaining argument count for the metainit cmdline 3719 * argv - the remainder of the unparsed command line 3720 * options - global options parsed by metainit 3721 * OUTPUT: ep - return error pointer 3722 * RETURNS: int - -1 failure, 0 success 3723 * PURPOSE: provides the command line parsing and name management overhead 3724 * for creating a new soft partition. Ultimately this calls 3725 * meta_create_sp() which does the real work of allocating space 3726 * for the new soft partition. 3727 */ 3728 int 3729 meta_init_sp( 3730 mdsetname_t **spp, 3731 int argc, 3732 char *argv[], 3733 mdcmdopts_t options, 3734 md_error_t *ep 3735 ) 3736 { 3737 char *compname = NULL; 3738 mdname_t *spcompnp = NULL; /* name of component volume */ 3739 char *devname = argv[0]; /* unit name */ 3740 mdname_t *np = NULL; /* name of soft partition */ 3741 md_sp_t *msp = NULL; 3742 int c; 3743 int old_optind; 3744 sp_ext_length_t len = 0LL; 3745 int rval = -1; 3746 uint_t seq; 3747 int oflag; 3748 int failed; 3749 mddrivename_t *dnp = NULL; 3750 sp_ext_length_t alignment = 0LL; 3751 sp_ext_node_t *extlist = NULL; 3752 3753 assert(argc > 0); 3754 3755 /* expect sp name, -p, optional -e, compname, and size parameters */ 3756 /* grab soft partition name */ 3757 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3758 goto out; 3759 3760 /* see if it exists already */ 3761 if (metagetmiscname(np, ep) != NULL) { 3762 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3763 meta_getminor(np->dev), devname); 3764 goto out; 3765 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3766 goto out; 3767 } else { 3768 mdclrerror(ep); 3769 } 3770 --argc, ++argv; 3771 3772 if (argc == 0) 3773 goto syntax; 3774 3775 /* grab -p */ 3776 if (strcmp(argv[0], "-p") != 0) 3777 goto syntax; 3778 --argc, ++argv; 3779 3780 if (argc == 0) 3781 goto syntax; 3782 3783 /* see if -e is there */ 3784 if (strcmp(argv[0], "-e") == 0) { 3785 /* use the whole disk */ 3786 options |= MDCMD_USE_WHOLE_DISK; 3787 --argc, ++argv; 3788 } 3789 3790 if (argc == 0) 3791 goto syntax; 3792 3793 /* get component name */ 3794 compname = Strdup(argv[0]); 3795 3796 if (options & MDCMD_USE_WHOLE_DISK) { 3797 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3798 goto out; 3799 } 3800 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3801 goto out; 3802 } 3803 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3804 goto out; 3805 } 3806 assert(*spp != NULL); 3807 3808 if (!(options & MDCMD_NOLOCK)) { 3809 /* grab set lock */ 3810 if (meta_lock(*spp, TRUE, ep)) 3811 goto out; 3812 3813 if (meta_check_ownership(*spp, ep) != 0) 3814 goto out; 3815 } 3816 3817 /* allocate the soft partition */ 3818 msp = Zalloc(sizeof (*msp)); 3819 3820 /* setup common */ 3821 msp->common.namep = np; 3822 msp->common.type = MD_METASP; 3823 3824 compname = spcompnp->cname; 3825 3826 assert(spcompnp->rname != NULL); 3827 --argc, ++argv; 3828 3829 if (argc == 0) { 3830 goto syntax; 3831 } 3832 3833 if (*argv[0] == '-') { 3834 /* 3835 * parse any other command line options, this includes 3836 * the recovery options -o and -b. The special thing 3837 * with these options is that the len needs to be 3838 * kept track of otherwise when the geometry of the 3839 * "device" is built it will create an invalid geometry 3840 */ 3841 old_optind = optind = 0; 3842 opterr = 0; 3843 oflag = 0; 3844 seq = 0; 3845 failed = 0; 3846 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3847 sp_ext_offset_t offset; 3848 sp_ext_length_t length; 3849 longlong_t tmp_size; 3850 3851 switch (c) { 3852 case 'A': /* data alignment */ 3853 if (meta_sp_parsesizestring(optarg, 3854 &alignment) == -1) { 3855 failed = 1; 3856 } 3857 break; 3858 case 'o': /* offset in the partition */ 3859 if (oflag == 1) { 3860 failed = 1; 3861 } else { 3862 tmp_size = atoll(optarg); 3863 if (tmp_size <= 0) { 3864 failed = 1; 3865 } else { 3866 oflag = 1; 3867 options |= MDCMD_DIRECT; 3868 3869 offset = tmp_size; 3870 } 3871 } 3872 3873 break; 3874 case 'b': /* number of blocks */ 3875 if (oflag == 0) { 3876 failed = 1; 3877 } else { 3878 tmp_size = atoll(optarg); 3879 if (tmp_size <= 0) { 3880 failed = 1; 3881 } else { 3882 oflag = 0; 3883 3884 length = tmp_size; 3885 3886 /* we have a pair of values */ 3887 meta_sp_list_insert(*spp, np, 3888 &extlist, offset, 3889 length, EXTTYP_ALLOC, 3890 seq++, EXTFLG_UPDATE, 3891 meta_sp_cmp_by_offset); 3892 len += length; 3893 } 3894 } 3895 3896 break; 3897 default: 3898 argc -= old_optind; 3899 argv += old_optind; 3900 goto options; 3901 } 3902 3903 if (failed) { 3904 argc -= old_optind; 3905 argv += old_optind; 3906 goto syntax; 3907 } 3908 3909 old_optind = optind; 3910 } 3911 argc -= optind; 3912 argv += optind; 3913 3914 /* 3915 * Must have matching pairs of -o and -b flags 3916 */ 3917 if (oflag != 0) 3918 goto syntax; 3919 3920 /* 3921 * Can't specify both layout (indicated indirectly by 3922 * len being set by thye -o/-b cases above) AND 3923 * alignment 3924 */ 3925 if ((len > 0LL) && (alignment > 0LL)) 3926 goto syntax; 3927 3928 /* 3929 * sanity check the allocation list 3930 */ 3931 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3932 goto syntax; 3933 } 3934 3935 if (len == 0LL) { 3936 if (argc == 0) 3937 goto syntax; 3938 if (meta_sp_parsesize(argv[0], &len) == -1) 3939 goto syntax; 3940 --argc, ++argv; 3941 } 3942 3943 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3944 msp->ext.ext_val->len = len; 3945 msp->compnamep = spcompnp; 3946 3947 /* we should be at the end */ 3948 if (argc != 0) 3949 goto syntax; 3950 3951 /* create soft partition */ 3952 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3953 goto out; 3954 rval = 0; 3955 3956 /* let em know */ 3957 if (options & MDCMD_PRINT) { 3958 (void) printf(dgettext(TEXT_DOMAIN, 3959 "%s: Soft Partition is setup\n"), 3960 devname); 3961 (void) fflush(stdout); 3962 } 3963 goto out; 3964 3965 syntax: 3966 /* syntax error */ 3967 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3968 goto out; 3969 3970 options: 3971 /* options error */ 3972 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3973 goto out; 3974 3975 out: 3976 if (msp != NULL) { 3977 if (msp->ext.ext_val != NULL) { 3978 Free(msp->ext.ext_val); 3979 } 3980 Free(msp); 3981 } 3982 3983 return (rval); 3984 } 3985 3986 /* 3987 * FUNCTION: meta_free_sp() 3988 * INPUT: msp - the soft partition unit to free 3989 * OUTPUT: none 3990 * RETURNS: void 3991 * PURPOSE: provides an interface from the rest of libmeta for freeing a 3992 * soft partition unit 3993 */ 3994 void 3995 meta_free_sp(md_sp_t *msp) 3996 { 3997 Free(msp); 3998 } 3999 4000 /* 4001 * FUNCTION: meta_sp_issp() 4002 * INPUT: sp - the set name to check 4003 * np - the name to check 4004 * OUTPUT: ep - return error pointer 4005 * RETURNS: int - 0 means sp,np is a soft partition 4006 * 1 means sp,np is not a soft partition 4007 * PURPOSE: determines whether the given device is a soft partition 4008 * device. This is called by other metadevice check routines. 4009 */ 4010 int 4011 meta_sp_issp( 4012 mdsetname_t *sp, 4013 mdname_t *np, 4014 md_error_t *ep 4015 ) 4016 { 4017 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 4018 return (1); 4019 4020 return (0); 4021 } 4022 4023 /* 4024 * FUNCTION: meta_check_sp() 4025 * INPUT: sp - the set name to check 4026 * msp - the unit structure to check 4027 * options - creation options 4028 * OUTPUT: repart_options - options to be passed to 4029 * meta_repartition_drive() 4030 * ep - return error pointer 4031 * RETURNS: int - 0 ok to create on this component 4032 * -1 error or not ok to create on this component 4033 * PURPOSE: Checks to determine whether the rules for creation of 4034 * soft partitions allow creation of a soft partition on 4035 * the device described by the mdname_t structure referred 4036 * to by msp->compnamep. 4037 * 4038 * NOTE: Does NOT check to determine whether the extents 4039 * described in the md_sp_t structure referred to by 4040 * msp will fit on the device described by the mdname_t 4041 * structure located at msp->compnamep. 4042 */ 4043 static int 4044 meta_check_sp( 4045 mdsetname_t *sp, 4046 md_sp_t *msp, 4047 mdcmdopts_t options, 4048 int *repart_options, 4049 md_error_t *ep 4050 ) 4051 { 4052 md_common_t *mdp; 4053 mdname_t *compnp = msp->compnamep; 4054 uint_t slice; 4055 mddrivename_t *dnp; 4056 mdname_t *slicenp; 4057 mdvtoc_t *vtocp; 4058 4059 /* make sure it is in the set */ 4060 if (meta_check_inset(sp, compnp, ep) != 0) 4061 return (-1); 4062 4063 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4064 uint_t rep_slice; 4065 4066 /* 4067 * check to make sure we can partition this drive. 4068 * we cannot continue if any of the following are 4069 * true: 4070 * The drive is a metadevice. 4071 * The drive contains a mounted slice. 4072 * The drive contains a slice being swapped to. 4073 * The drive contains slices which are part of other 4074 * metadevices. 4075 * The drive contains a metadb. 4076 */ 4077 if (metaismeta(compnp)) 4078 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4079 compnp->cname)); 4080 4081 assert(compnp->drivenamep != NULL); 4082 4083 /* 4084 * ensure that we have slice 0 since the disk will be 4085 * repartitioned in the USE_WHOLE_DISK case. this check 4086 * is redundant unless the user incorrectly specifies a 4087 * a fully qualified drive AND slice name (i.e., 4088 * /dev/dsk/cXtXdXsX), which will be incorrectly 4089 * recognized as a drive name by the metaname code. 4090 */ 4091 4092 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4093 return (-1); 4094 if (slice != MD_SLICE0) 4095 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4096 4097 dnp = compnp->drivenamep; 4098 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4099 return (-1); 4100 4101 for (slice = 0; slice < vtocp->nparts; slice++) { 4102 4103 /* only check if the slice really exists */ 4104 if (vtocp->parts[slice].size == 0) 4105 continue; 4106 4107 slicenp = metaslicename(dnp, slice, ep); 4108 if (slicenp == NULL) 4109 return (-1); 4110 4111 /* check to ensure that it is not already in use */ 4112 if (meta_check_inuse(sp, 4113 slicenp, MDCHK_INUSE, ep) != 0) { 4114 return (-1); 4115 } 4116 4117 /* 4118 * Up to this point, tests are applied to all 4119 * slices uniformly. 4120 */ 4121 4122 if (slice == rep_slice) { 4123 /* 4124 * Tests inside the body of this 4125 * conditional are applied only to 4126 * slice seven. 4127 */ 4128 if (meta_check_inmeta(sp, slicenp, 4129 options | MDCHK_ALLOW_MDDB | 4130 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4131 return (-1); 4132 4133 /* 4134 * For slice seven, a metadb is NOT an 4135 * automatic failure. It merely means 4136 * that we're not allowed to muck 4137 * about with the partitioning of that 4138 * slice. We indicate this by masking 4139 * in the MD_REPART_LEAVE_REP flag. 4140 */ 4141 if (metahasmddb(sp, slicenp, ep)) { 4142 assert(repart_options != 4143 NULL); 4144 *repart_options |= 4145 MD_REPART_LEAVE_REP; 4146 } 4147 4148 /* 4149 * Skip the remaining tests for slice 4150 * seven 4151 */ 4152 continue; 4153 } 4154 4155 /* 4156 * Tests below this point will be applied to 4157 * all slices EXCEPT for the replica slice. 4158 */ 4159 4160 4161 /* check if component is in a metadevice */ 4162 if (meta_check_inmeta(sp, slicenp, options, 0, 4163 -1, ep) != 0) 4164 return (-1); 4165 4166 /* check to see if component has a metadb */ 4167 if (metahasmddb(sp, slicenp, ep)) 4168 return (mddeverror(ep, MDE_HAS_MDDB, 4169 slicenp->dev, slicenp->cname)); 4170 } 4171 /* 4172 * This should be all of the testing necessary when 4173 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4174 * meta_check_sp() is oriented towards component 4175 * arguments instead of disks. 4176 */ 4177 goto meta_check_sp_ok; 4178 4179 } 4180 4181 /* check to ensure that it is not already in use */ 4182 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4183 return (-1); 4184 } 4185 4186 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4187 4188 /* 4189 * The component can have one or more soft partitions on it 4190 * already, but can't be part of any other type of metadevice, 4191 * so if it is used for a metadevice, but the metadevice 4192 * isn't a soft partition, return failure. 4193 */ 4194 4195 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4196 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4197 return (-1); 4198 } 4199 } else { /* handle metadevices */ 4200 /* get underlying unit & check capabilities */ 4201 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4202 return (-1); 4203 4204 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4205 (! (mdp->capabilities & MD_CAN_SP))) 4206 return (mdmderror(ep, MDE_INVAL_UNIT, 4207 meta_getminor(compnp->dev), compnp->cname)); 4208 } 4209 4210 meta_check_sp_ok: 4211 mdclrerror(ep); 4212 return (0); 4213 } 4214 4215 /* 4216 * FUNCTION: meta_create_sp() 4217 * INPUT: sp - the set name to create in 4218 * msp - the unit structure to create 4219 * oblist - an optional list of requested extents (-o/-b options) 4220 * options - creation options 4221 * alignment - data alignment 4222 * OUTPUT: ep - return error pointer 4223 * RETURNS: int - 0 success, -1 error 4224 * PURPOSE: does most of the work for creating a soft partition. If 4225 * metainit -p -e was used, first partition the drive. Then 4226 * create an extent list based on the existing soft partitions 4227 * and assume all space not used by them is free. Storage for 4228 * the new soft partition is allocated from the free extents 4229 * based on the length specified on the command line or the 4230 * oblist passed in. The unit structure is then committed and 4231 * the watermarks are updated. Finally, the status is changed to 4232 * Okay and the process is complete. 4233 */ 4234 static int 4235 meta_create_sp( 4236 mdsetname_t *sp, 4237 md_sp_t *msp, 4238 sp_ext_node_t *oblist, 4239 mdcmdopts_t options, 4240 sp_ext_length_t alignment, 4241 md_error_t *ep 4242 ) 4243 { 4244 mdname_t *np = msp->common.namep; 4245 mdname_t *compnp = msp->compnamep; 4246 mp_unit_t *mp = NULL; 4247 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4248 md_set_params_t set_params; 4249 int rval = -1; 4250 diskaddr_t comp_size; 4251 diskaddr_t sp_start; 4252 sp_ext_node_t *extlist = NULL; 4253 int numexts = 0; /* number of extents */ 4254 int count = 0; 4255 int committed = 0; 4256 int repart_options = MD_REPART_FORCE; 4257 int create_flag = MD_CRO_32BIT; 4258 4259 md_set_desc *sd; 4260 mm_unit_t *mm; 4261 md_set_mmown_params_t *ownpar = NULL; 4262 int comp_is_mirror = 0; 4263 4264 /* validate soft partition */ 4265 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4266 return (-1); 4267 4268 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4269 if ((options & MDCMD_DOIT) != 0) { 4270 if (meta_repartition_drive(sp, 4271 compnp->drivenamep, 4272 repart_options, 4273 NULL, /* Don't return the VTOC */ 4274 ep) != 0) 4275 4276 return (-1); 4277 } else { 4278 /* 4279 * If -n and -e are both specified, it doesn't make 4280 * sense to continue without actually partitioning 4281 * the drive. 4282 */ 4283 return (0); 4284 } 4285 } 4286 4287 /* populate the start_blk field of the component name */ 4288 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4289 MD_DISKADDR_ERROR) { 4290 rval = -1; 4291 goto out; 4292 } 4293 4294 if (options & MDCMD_DOIT) { 4295 /* store name in namespace */ 4296 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4297 rval = -1; 4298 goto out; 4299 } 4300 } 4301 4302 /* 4303 * Get a list of the soft partitions that currently reside on 4304 * the component. We should ALWAYS force reload the cache, 4305 * because if this is a single creation, there will not BE a 4306 * cached list, and if we're using the md.tab, we must rebuild 4307 * the list because it won't contain the previous (if any) 4308 * soft partition. 4309 */ 4310 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4311 if (count < 0) { 4312 /* error occured */ 4313 rval = -1; 4314 goto out; 4315 } 4316 4317 /* 4318 * get the size of the underlying device. if the size is smaller 4319 * than or equal to the watermark size, we know there isn't 4320 * enough space. 4321 */ 4322 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4323 rval = -1; 4324 goto out; 4325 } else if (comp_size <= MD_SP_WMSIZE) { 4326 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4327 rval = -1; 4328 goto out; 4329 } 4330 /* 4331 * seed extlist with reserved space at the beginning of the volume and 4332 * enough space for the end watermark. The end watermark always gets 4333 * updated, but if the underlying device changes size it may not be 4334 * pointed to until the extent before it is updated. Since the 4335 * end of the reserved space is where the first watermark starts, 4336 * the reserved extent should never be marked for updating. 4337 */ 4338 4339 meta_sp_list_insert(NULL, NULL, &extlist, 4340 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4341 meta_sp_list_insert(NULL, NULL, &extlist, 4342 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4343 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4344 4345 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4346 rval = -1; 4347 goto out; 4348 } 4349 4350 metafreenamelist(spnlp); 4351 4352 if (getenv(META_SP_DEBUG)) { 4353 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4354 meta_sp_list_dump(extlist); 4355 } 4356 4357 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4358 4359 /* get extent list from -o/-b options or from free space */ 4360 if (options & MDCMD_DIRECT) { 4361 if (getenv(META_SP_DEBUG)) { 4362 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4363 meta_sp_list_dump(oblist); 4364 } 4365 4366 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4367 if (numexts == -1) { 4368 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4369 rval = -1; 4370 goto out; 4371 } 4372 } else { 4373 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4374 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4375 meta_sp_get_default_alignment(sp, compnp, ep)); 4376 if (numexts == -1) { 4377 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4378 rval = -1; 4379 goto out; 4380 } 4381 } 4382 4383 assert(extlist != NULL); 4384 4385 /* create soft partition */ 4386 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4387 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4388 4389 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4390 4391 /* if we're not doing anything (metainit -n), return success */ 4392 if (! (options & MDCMD_DOIT)) { 4393 rval = 0; /* success */ 4394 goto out; 4395 } 4396 4397 (void) memset(&set_params, 0, sizeof (set_params)); 4398 4399 if (create_flag == MD_CRO_64BIT) { 4400 mp->c.un_revision |= MD_64BIT_META_DEV; 4401 set_params.options = MD_CRO_64BIT; 4402 } else { 4403 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4404 set_params.options = MD_CRO_32BIT; 4405 } 4406 4407 if (getenv(META_SP_DEBUG)) { 4408 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4409 meta_sp_printunit(mp); 4410 } 4411 4412 /* 4413 * Check to see if we're trying to create a partition on a mirror. If so 4414 * we may have to enforce an ownership change before writing the 4415 * watermark out. 4416 */ 4417 if (metaismeta(compnp)) { 4418 char *miscname; 4419 4420 miscname = metagetmiscname(compnp, ep); 4421 if (miscname != NULL) 4422 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4423 else 4424 comp_is_mirror = 0; 4425 } else { 4426 comp_is_mirror = 0; 4427 } 4428 4429 /* 4430 * For a multi-node environment we have to ensure that the master 4431 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4432 * If the master does not own the device we will deadlock as the 4433 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4434 * ownership change that will block as the MD_IOCSET is still in 4435 * progress. To close this window we force an owner change to occur 4436 * before issuing the MD_IOCSET. We cannot simply open the device and 4437 * write to it as this will only work for the first soft-partition 4438 * creation. 4439 */ 4440 4441 if (comp_is_mirror && !metaislocalset(sp)) { 4442 4443 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4444 rval = -1; 4445 goto out; 4446 } 4447 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4448 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 4449 if (mm == NULL) { 4450 rval = -1; 4451 goto out; 4452 } else { 4453 rval = meta_mn_change_owner(&ownpar, sp->setno, 4454 meta_getminor(compnp->dev), 4455 sd->sd_mn_mynode->nd_nodeid, 4456 MD_MN_MM_PREVENT_CHANGE | 4457 MD_MN_MM_SPAWN_THREAD); 4458 if (rval == -1) 4459 goto out; 4460 } 4461 } 4462 } 4463 4464 set_params.mnum = MD_SID(mp); 4465 set_params.size = mp->c.un_size; 4466 set_params.mdp = (uintptr_t)mp; 4467 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4468 4469 /* first phase of commit. */ 4470 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4471 np->cname) != 0) { 4472 (void) mdstealerror(ep, &set_params.mde); 4473 rval = -1; 4474 goto out; 4475 } 4476 4477 /* we've successfully committed the record */ 4478 committed = 1; 4479 4480 /* write watermarks */ 4481 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4482 rval = -1; 4483 goto out; 4484 } 4485 4486 /* 4487 * Allow mirror ownership to change. If we don't succeed in this 4488 * ioctl it isn't fatal, but the cluster will probably hang fairly 4489 * soon as the mirror owner won't change. However, we have 4490 * successfully written the watermarks out to the device so the 4491 * softpart creation has succeeded 4492 */ 4493 if (ownpar) { 4494 (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum, 4495 ownpar->d.owner, 4496 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 4497 } 4498 4499 /* second phase of commit, set status to MD_SP_OK */ 4500 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4501 rval = -1; 4502 goto out; 4503 } 4504 rval = 0; 4505 out: 4506 Free(mp); 4507 if (ownpar) 4508 Free(ownpar); 4509 4510 if (extlist != NULL) 4511 meta_sp_list_free(&extlist); 4512 4513 if (rval != 0 && keynlp != NULL && committed != 1) 4514 (void) del_key_names(sp, keynlp, NULL); 4515 4516 metafreenamelist(keynlp); 4517 4518 return (rval); 4519 } 4520 4521 /* 4522 * ************************************************************************** 4523 * Reset (metaclear) Functions * 4524 * ************************************************************************** 4525 */ 4526 4527 /* 4528 * FUNCTION: meta_sp_reset_common() 4529 * INPUT: sp - the set name of the device to reset 4530 * np - the name of the device to reset 4531 * msp - the unit structure to reset 4532 * options - metaclear options 4533 * OUTPUT: ep - return error pointer 4534 * RETURNS: int - 0 success, -1 error 4535 * PURPOSE: "resets", or more accurately deletes, the soft partition 4536 * specified. First the state is set to "deleting" and then the 4537 * watermarks are all cleared out. Once the watermarks have been 4538 * updated, the unit structure is deleted from the metadb. 4539 */ 4540 static int 4541 meta_sp_reset_common( 4542 mdsetname_t *sp, 4543 mdname_t *np, 4544 md_sp_t *msp, 4545 md_sp_reset_t reset_params, 4546 mdcmdopts_t options, 4547 md_error_t *ep 4548 ) 4549 { 4550 char *miscname; 4551 int rval = -1; 4552 int is_open = 0; 4553 4554 /* make sure that nobody owns us */ 4555 if (MD_HAS_PARENT(msp->common.parent)) 4556 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4557 np->cname)); 4558 4559 /* make sure that the soft partition isn't open */ 4560 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4561 return (-1); 4562 else if (is_open) 4563 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4564 np->cname)); 4565 4566 /* get miscname */ 4567 if ((miscname = metagetmiscname(np, ep)) == NULL) 4568 return (-1); 4569 4570 /* fill in reset params */ 4571 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4572 reset_params.mnum = meta_getminor(np->dev); 4573 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4574 4575 /* 4576 * clear soft partition - phase one. 4577 * place the soft partition into the "delete pending" state. 4578 */ 4579 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4580 return (-1); 4581 4582 /* 4583 * Now clear the watermarks. If the force flag is specified, 4584 * ignore any errors writing the watermarks and delete the unit 4585 * structure anyway. An error may leave the on-disk format in a 4586 * corrupt state. If force is not specified and we fail here, 4587 * the soft partition will remain in the "delete pending" state. 4588 */ 4589 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4590 ((options & MDCMD_FORCE) == 0)) 4591 goto out; 4592 4593 /* 4594 * clear soft partition - phase two. 4595 * the driver removes the soft partition from the metadb and 4596 * zeros out incore version. 4597 */ 4598 if (metaioctl(MD_IOCRESET, &reset_params, 4599 &reset_params.mde, np->cname) != 0) { 4600 (void) mdstealerror(ep, &reset_params.mde); 4601 goto out; 4602 } 4603 4604 /* 4605 * Wait for the /dev to be cleaned up. Ignore the return 4606 * value since there's not much we can do. 4607 */ 4608 (void) meta_update_devtree(meta_getminor(np->dev)); 4609 4610 rval = 0; /* success */ 4611 4612 if (options & MDCMD_PRINT) { 4613 (void) printf(dgettext(TEXT_DOMAIN, 4614 "%s: Soft Partition is cleared\n"), 4615 np->cname); 4616 (void) fflush(stdout); 4617 } 4618 4619 /* 4620 * if told to recurse and on a metadevice, then attempt to 4621 * clear the subdevices. Indicate failure if the clear fails. 4622 */ 4623 if ((options & MDCMD_RECURSE) && 4624 (metaismeta(msp->compnamep)) && 4625 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4626 rval = -1; 4627 4628 out: 4629 meta_invalidate_name(np); 4630 return (rval); 4631 } 4632 4633 /* 4634 * FUNCTION: meta_sp_reset() 4635 * INPUT: sp - the set name of the device to reset 4636 * np - the name of the device to reset 4637 * options - metaclear options 4638 * OUTPUT: ep - return error pointer 4639 * RETURNS: int - 0 success, -1 error 4640 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4641 * soft partition. If np is NULL, then soft partitions are 4642 * all deleted at the current level and then recursively deleted. 4643 * Otherwise, if a name is specified either directly or as a 4644 * result of a recursive operation, it deletes only that name. 4645 * Since something sitting under a soft partition may be parented 4646 * to it, we have to reparent that other device to another soft 4647 * partition on the same component if we're deleting the one it's 4648 * parented to. 4649 */ 4650 int 4651 meta_sp_reset( 4652 mdsetname_t *sp, 4653 mdname_t *np, 4654 mdcmdopts_t options, 4655 md_error_t *ep 4656 ) 4657 { 4658 md_sp_t *msp; 4659 int rval = -1; 4660 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4661 md_sp_reset_t reset_params; 4662 int num_sp; 4663 4664 assert(sp != NULL); 4665 4666 /* reset/delete all soft paritions */ 4667 if (np == NULL) { 4668 /* 4669 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4670 * is incorrect for soft partitions. We want to clear 4671 * all soft partitions at a particular level in the 4672 * metadevice stack before moving to the next level. 4673 * Thus, we clear MDCMD_RECURSE from the options. 4674 */ 4675 options &= ~MDCMD_RECURSE; 4676 4677 /* for each soft partition */ 4678 rval = 0; 4679 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4680 rval = -1; 4681 4682 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4683 np = nlp->namep; 4684 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4685 rval = -1; 4686 break; 4687 } 4688 /* 4689 * meta_reset_all calls us twice to get soft 4690 * partitions at the top and bottom of the stack. 4691 * thus, if we have a parent, we'll get deleted 4692 * on the next call. 4693 */ 4694 if (MD_HAS_PARENT(msp->common.parent)) 4695 continue; 4696 /* 4697 * If this is a multi-node set, we send a series 4698 * of individual metaclear commands. 4699 */ 4700 if (meta_is_mn_set(sp, ep)) { 4701 if (meta_mn_send_metaclear_command(sp, 4702 np->cname, options, 0, ep) != 0) { 4703 rval = -1; 4704 break; 4705 } 4706 } else { 4707 if (meta_sp_reset(sp, np, options, ep) != 0) { 4708 rval = -1; 4709 break; 4710 } 4711 } 4712 } 4713 /* cleanup return status */ 4714 metafreenamelist(spnlp); 4715 return (rval); 4716 } 4717 4718 /* check the name */ 4719 if (metachkmeta(np, ep) != 0) 4720 return (-1); 4721 4722 /* get the unit structure */ 4723 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4724 return (-1); 4725 4726 /* clear out reset parameters */ 4727 (void) memset(&reset_params, 0, sizeof (reset_params)); 4728 4729 /* if our child is a metadevice, we need to deparent/reparent it */ 4730 if (metaismeta(msp->compnamep)) { 4731 /* get sp's on this component */ 4732 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4733 &spnlp, 1, ep)) <= 0) 4734 /* no sp's on this device. error! */ 4735 return (-1); 4736 else if (num_sp == 1) 4737 /* last sp on this device, so we deparent */ 4738 reset_params.new_parent = MD_NO_PARENT; 4739 else { 4740 /* have to reparent this metadevice */ 4741 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4742 if (meta_getminor(nlp->namep->dev) == 4743 meta_getminor(np->dev)) 4744 continue; 4745 /* 4746 * this isn't the softpart we are deleting, 4747 * so use this device as the new parent. 4748 */ 4749 reset_params.new_parent = 4750 meta_getminor(nlp->namep->dev); 4751 break; 4752 } 4753 } 4754 metafreenamelist(spnlp); 4755 } 4756 4757 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4758 return (-1); 4759 4760 return (0); 4761 } 4762 4763 /* 4764 * FUNCTION: meta_sp_reset_component() 4765 * INPUT: sp - the set name of the device to reset 4766 * name - the string name of the device to reset 4767 * options - metaclear options 4768 * OUTPUT: ep - return error pointer 4769 * RETURNS: int - 0 success, -1 error 4770 * PURPOSE: provides the ability to delete all soft partitions on a 4771 * specified device (metaclear -p). It first gets all of the 4772 * soft partitions on the component and then deletes each one 4773 * individually. 4774 */ 4775 int 4776 meta_sp_reset_component( 4777 mdsetname_t *sp, 4778 char *name, 4779 mdcmdopts_t options, 4780 md_error_t *ep 4781 ) 4782 { 4783 mdname_t *compnp, *np; 4784 mdnamelist_t *spnlp = NULL; 4785 mdnamelist_t *nlp = NULL; 4786 md_sp_t *msp; 4787 int count; 4788 md_sp_reset_t reset_params; 4789 4790 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4791 return (-1); 4792 4793 /* If we're starting out with no soft partitions, it's an error */ 4794 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4795 if (count == 0) 4796 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4797 else if (count < 0) 4798 return (-1); 4799 4800 /* 4801 * clear all soft partitions on this component. 4802 * NOTE: we reparent underlying metadevices as we go so that 4803 * things stay sane. Also, if we encounter an error, we stop 4804 * and go no further in case recovery might be needed. 4805 */ 4806 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4807 /* clear out reset parameters */ 4808 (void) memset(&reset_params, 0, sizeof (reset_params)); 4809 4810 /* check the name */ 4811 np = nlp->namep; 4812 4813 if (metachkmeta(np, ep) != 0) { 4814 metafreenamelist(spnlp); 4815 return (-1); 4816 } 4817 4818 /* get the unit structure */ 4819 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4820 metafreenamelist(spnlp); 4821 return (-1); 4822 } 4823 4824 /* have to deparent/reparent metadevices */ 4825 if (metaismeta(compnp)) { 4826 if (nlp->next == NULL) 4827 reset_params.new_parent = MD_NO_PARENT; 4828 else 4829 reset_params.new_parent = 4830 meta_getminor(spnlp->next->namep->dev); 4831 } 4832 4833 /* clear soft partition */ 4834 if (meta_sp_reset_common(sp, np, msp, reset_params, 4835 options, ep) < 0) { 4836 metafreenamelist(spnlp); 4837 return (-1); 4838 } 4839 } 4840 metafreenamelist(spnlp); 4841 return (0); 4842 } 4843 4844 /* 4845 * ************************************************************************** 4846 * Grow (metattach) Functions * 4847 * ************************************************************************** 4848 */ 4849 4850 /* 4851 * FUNCTION: meta_sp_attach() 4852 * INPUT: sp - the set name of the device to attach to 4853 * np - the name of the device to attach to 4854 * addsize - the unparsed string holding the amount of space to add 4855 * options - metattach options 4856 * alignment - data alignment 4857 * OUTPUT: ep - return error pointer 4858 * RETURNS: int - 0 success, -1 error 4859 * PURPOSE: grows a soft partition by reading in the existing unit 4860 * structure and setting its state to Growing, allocating more 4861 * space (similar to meta_create_sp()), updating the watermarks, 4862 * and then writing out the new unit structure in the Okay state. 4863 */ 4864 int 4865 meta_sp_attach( 4866 mdsetname_t *sp, 4867 mdname_t *np, 4868 char *addsize, 4869 mdcmdopts_t options, 4870 sp_ext_length_t alignment, 4871 md_error_t *ep 4872 ) 4873 { 4874 md_grow_params_t grow_params; 4875 sp_ext_length_t grow_len; /* amount to grow */ 4876 mp_unit_t *mp, *new_un; 4877 mdname_t *compnp = NULL; 4878 4879 sp_ext_node_t *extlist = NULL; 4880 int numexts; 4881 mdnamelist_t *spnlp = NULL; 4882 int count; 4883 md_sp_t *msp; 4884 daddr_t start_block; 4885 4886 /* should have the same set */ 4887 assert(sp != NULL); 4888 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4889 4890 /* check name */ 4891 if (metachkmeta(np, ep) != 0) 4892 return (-1); 4893 4894 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4895 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4896 } 4897 4898 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4899 return (-1); 4900 4901 /* make sure we don't have a parent */ 4902 if (MD_HAS_PARENT(mp->c.un_parent)) { 4903 Free(mp); 4904 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4905 } 4906 4907 if (getenv(META_SP_DEBUG)) { 4908 meta_sp_debug("meta_sp_attach: Unit structure before new " 4909 "space:\n"); 4910 meta_sp_printunit(mp); 4911 } 4912 4913 /* 4914 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4915 * If this was not the case we would suffer the following 4916 * assertion failure: 4917 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4918 * file meta_check.x, line 315 4919 * I guess this is because we have not "seen" this drive before 4920 * and hence hit the failure - this is of course the attach routine 4921 */ 4922 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4923 Free(mp); 4924 return (-1); 4925 } 4926 4927 /* metakeyname does not fill in the key. */ 4928 compnp->key = mp->un_key; 4929 4930 /* work out the space on the component that we are dealing with */ 4931 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4932 4933 /* 4934 * see if the component has been soft partitioned yet, or if an 4935 * error occurred. 4936 */ 4937 if (count == 0) { 4938 Free(mp); 4939 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4940 } else if (count < 0) { 4941 Free(mp); 4942 return (-1); 4943 } 4944 4945 /* 4946 * seed extlist with reserved space at the beginning of the volume and 4947 * enough space for the end watermark. The end watermark always gets 4948 * updated, but if the underlying device changes size it may not be 4949 * pointed to until the extent before it is updated. Since the 4950 * end of the reserved space is where the first watermark starts, 4951 * the reserved extent should never be marked for updating. 4952 */ 4953 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4954 MD_DISKADDR_ERROR) { 4955 Free(mp); 4956 return (-1); 4957 } 4958 4959 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4960 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4961 meta_sp_list_insert(NULL, NULL, &extlist, 4962 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4963 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4964 4965 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4966 Free(mp); 4967 return (-1); 4968 } 4969 4970 metafreenamelist(spnlp); 4971 4972 if (getenv(META_SP_DEBUG)) { 4973 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4974 meta_sp_list_dump(extlist); 4975 } 4976 4977 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4978 4979 assert(mp->un_numexts >= 1); 4980 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4981 mp->un_ext[mp->un_numexts - 1].un_poff, 4982 (alignment > 0) ? alignment : 4983 meta_sp_get_default_alignment(sp, compnp, ep)); 4984 4985 if (numexts == -1) { 4986 Free(mp); 4987 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4988 } 4989 4990 /* allocate new unit structure and copy in old unit */ 4991 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4992 grow_len, numexts, ep)) == NULL) { 4993 Free(mp); 4994 return (-1); 4995 } 4996 Free(mp); 4997 4998 /* If running in dryrun mode (-n option), we're done here */ 4999 if ((options & MDCMD_DOIT) == 0) { 5000 if (options & MDCMD_PRINT) { 5001 (void) printf(dgettext(TEXT_DOMAIN, 5002 "%s: Soft Partition would grow\n"), 5003 np->cname); 5004 (void) fflush(stdout); 5005 } 5006 return (0); 5007 } 5008 5009 if (getenv(META_SP_DEBUG)) { 5010 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 5011 meta_sp_printunit(new_un); 5012 } 5013 5014 assert(new_un != NULL); 5015 5016 (void) memset(&grow_params, 0, sizeof (grow_params)); 5017 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 5018 grow_params.options = MD_CRO_64BIT; 5019 new_un->c.un_revision |= MD_64BIT_META_DEV; 5020 } else { 5021 grow_params.options = MD_CRO_32BIT; 5022 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 5023 } 5024 grow_params.mnum = MD_SID(new_un); 5025 grow_params.size = new_un->c.un_size; 5026 grow_params.mdp = (uintptr_t)new_un; 5027 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5028 5029 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5030 np->cname) != 0) { 5031 (void) mdstealerror(ep, &grow_params.mde); 5032 return (-1); 5033 } 5034 5035 /* update all watermarks */ 5036 5037 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5038 return (-1); 5039 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5040 return (-1); 5041 5042 5043 /* second phase of commit, set status to MD_SP_OK */ 5044 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5045 return (-1); 5046 5047 meta_invalidate_name(np); 5048 5049 if (options & MDCMD_PRINT) { 5050 (void) printf(dgettext(TEXT_DOMAIN, 5051 "%s: Soft Partition has been grown\n"), 5052 np->cname); 5053 (void) fflush(stdout); 5054 } 5055 5056 return (0); 5057 } 5058 5059 /* 5060 * ************************************************************************** 5061 * Recovery (metarecover) Functions * 5062 * ************************************************************************** 5063 */ 5064 5065 /* 5066 * FUNCTION: meta_recover_sp() 5067 * INPUT: sp - the name of the set we are recovering on 5068 * compnp - name pointer for device we are recovering on 5069 * argc - argument count 5070 * argv - left over arguments not parsed by metarecover command 5071 * options - metarecover options 5072 * OUTPUT: ep - return error pointer 5073 * RETURNS: int - 0 - success, -1 - error 5074 * PURPOSE: parse soft partitioning-specific metarecover options and 5075 * dispatch to the appropriate function to handle recovery. 5076 */ 5077 int 5078 meta_recover_sp( 5079 mdsetname_t *sp, 5080 mdname_t *compnp, 5081 int argc, 5082 char *argv[], 5083 mdcmdopts_t options, 5084 md_error_t *ep 5085 ) 5086 { 5087 md_set_desc *sd; 5088 5089 if (argc > 1) { 5090 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5091 argc, argv); 5092 return (-1); 5093 } 5094 5095 /* 5096 * For a MN set, this operation must be performed on the master 5097 * as it is responsible for maintaining the watermarks 5098 */ 5099 if (!metaislocalset(sp)) { 5100 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5101 return (-1); 5102 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5103 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5104 sd->sd_mn_master_nodenm, NULL, NULL); 5105 return (-1); 5106 } 5107 } 5108 if (argc == 0) { 5109 /* 5110 * if no additional arguments are passed, metarecover should 5111 * validate both on-disk and metadb structures as well as 5112 * checking that both are consistent with each other 5113 */ 5114 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5115 return (-1); 5116 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5117 return (-1); 5118 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5119 return (-1); 5120 } else if (strcmp(argv[0], "-d") == 0) { 5121 /* 5122 * Ensure that there is no existing valid record for this 5123 * soft-partition. If there is we have nothing to do. 5124 */ 5125 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5126 return (-1); 5127 /* validate and recover from on-disk structures */ 5128 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5129 return (-1); 5130 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5131 return (-1); 5132 } else if (strcmp(argv[0], "-m") == 0) { 5133 /* validate and recover from metadb structures */ 5134 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5135 return (-1); 5136 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5137 return (-1); 5138 } else { 5139 /* syntax error */ 5140 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5141 argc, argv); 5142 return (-1); 5143 } 5144 5145 return (0); 5146 } 5147 5148 /* 5149 * FUNCTION: meta_sp_display_exthdr() 5150 * INPUT: none 5151 * OUTPUT: none 5152 * RETURNS: void 5153 * PURPOSE: print header line for sp_ext_node_t information. to be used 5154 * in conjunction with meta_sp_display_ext(). 5155 */ 5156 static void 5157 meta_sp_display_exthdr(void) 5158 { 5159 (void) printf("%20s %5s %7s %20s %20s\n", 5160 dgettext(TEXT_DOMAIN, "Name"), 5161 dgettext(TEXT_DOMAIN, "Seq#"), 5162 dgettext(TEXT_DOMAIN, "Type"), 5163 dgettext(TEXT_DOMAIN, "Offset"), 5164 dgettext(TEXT_DOMAIN, "Length")); 5165 } 5166 5167 5168 /* 5169 * FUNCTION: meta_sp_display_ext() 5170 * INPUT: ext - extent to display 5171 * OUTPUT: none 5172 * RETURNS: void 5173 * PURPOSE: print selected fields from sp_ext_node_t. 5174 */ 5175 static void 5176 meta_sp_display_ext(sp_ext_node_t *ext) 5177 { 5178 /* print extent information */ 5179 if (ext->ext_namep != NULL) 5180 (void) printf("%20s ", ext->ext_namep->cname); 5181 else 5182 (void) printf("%20s ", "NONE"); 5183 5184 (void) printf("%5u ", ext->ext_seq); 5185 5186 switch (ext->ext_type) { 5187 case EXTTYP_ALLOC: 5188 (void) printf("%7s ", "ALLOC"); 5189 break; 5190 case EXTTYP_FREE: 5191 (void) printf("%7s ", "FREE"); 5192 break; 5193 case EXTTYP_RESERVED: 5194 (void) printf("%7s ", "RESV"); 5195 break; 5196 case EXTTYP_END: 5197 (void) printf("%7s ", "END"); 5198 break; 5199 default: 5200 (void) printf("%7s ", "INVLD"); 5201 break; 5202 } 5203 5204 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5205 } 5206 5207 5208 /* 5209 * FUNCTION: meta_sp_checkseq() 5210 * INPUT: extlist - list of extents to be checked 5211 * OUTPUT: none 5212 * RETURNS: int - 0 - success, -1 - error 5213 * PURPOSE: check soft partition sequence numbers. this function assumes 5214 * that a list of extents representing 1 or more soft partitions 5215 * is passed in sorted in sequence number order. within a 5216 * single soft partition, there may not be any missing or 5217 * duplicate sequence numbers. 5218 */ 5219 static int 5220 meta_sp_checkseq(sp_ext_node_t *extlist) 5221 { 5222 sp_ext_node_t *ext; 5223 5224 assert(extlist != NULL); 5225 5226 for (ext = extlist; 5227 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5228 ext = ext->ext_next) { 5229 if (ext->ext_next->ext_namep != NULL && 5230 strcmp(ext->ext_next->ext_namep->cname, 5231 ext->ext_namep->cname) != 0) 5232 continue; 5233 5234 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5235 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5236 "%s: sequence numbers are " 5237 "incorrect: %d should be %d\n"), 5238 ext->ext_next->ext_namep->cname, 5239 ext->ext_next->ext_seq, ext->ext_seq + 1); 5240 return (-1); 5241 } 5242 } 5243 return (0); 5244 } 5245 5246 5247 /* 5248 * FUNCTION: meta_sp_resolve_name_conflict() 5249 * INPUT: sp - name of set we're are recovering in. 5250 * old_np - name pointer of soft partition we found on disk. 5251 * OUTPUT: new_np - name pointer for new soft partition name. 5252 * ep - error pointer returned. 5253 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5254 * PURPOSE: Check to see if the name of one of the soft partitions we found 5255 * on disk already exists in the metadb. If so, prompt for a new 5256 * name. In addition, we keep a static array of names that 5257 * will be recovered from this device since these names don't 5258 * exist in the configuration at this point but cannot be 5259 * recovered more than once. 5260 */ 5261 static int 5262 meta_sp_resolve_name_conflict( 5263 mdsetname_t *sp, 5264 mdname_t *old_np, 5265 mdname_t **new_np, 5266 md_error_t *ep 5267 ) 5268 { 5269 char yesno[255]; 5270 char *yes; 5271 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5272 int nunits; 5273 static int *used_names = NULL; 5274 5275 assert(old_np != NULL); 5276 5277 if (used_names == NULL) { 5278 if ((nunits = meta_get_nunits(ep)) < 0) 5279 return (-1); 5280 used_names = Zalloc(nunits * sizeof (int)); 5281 } 5282 5283 /* see if it exists already */ 5284 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5285 metagetmiscname(old_np, ep) == NULL) { 5286 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5287 return (-1); 5288 else { 5289 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5290 mdclrerror(ep); 5291 return (0); 5292 } 5293 } 5294 5295 /* name exists, ask the user for a new one */ 5296 (void) printf(dgettext(TEXT_DOMAIN, 5297 "WARNING: A soft partition named %s was found in the extent\n" 5298 "headers, but this name already exists in the metadb " 5299 "configuration.\n" 5300 "In order to continue recovery you must supply\n" 5301 "a new name for this soft partition.\n"), old_np->cname); 5302 (void) printf(dgettext(TEXT_DOMAIN, 5303 "Would you like to continue and supply a new name? (yes/no) ")); 5304 5305 (void) fflush(stdout); 5306 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5307 (strlen(yesno) == 1)) 5308 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5309 dgettext(TEXT_DOMAIN, "no")); 5310 yes = dgettext(TEXT_DOMAIN, "yes"); 5311 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5312 return (-1); 5313 } 5314 5315 (void) fflush(stdin); 5316 5317 /* get the new name */ 5318 for (;;) { 5319 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5320 "for this soft partition (dXXXX) ")); 5321 (void) fflush(stdout); 5322 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5323 (void) strcpy(newname, ""); 5324 5325 /* remove newline character */ 5326 if (newname[strlen(newname) - 1] == '\n') 5327 newname[strlen(newname) - 1] = '\0'; 5328 5329 if (!(is_metaname(newname)) || 5330 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5331 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5332 "Invalid metadevice name\n")); 5333 (void) fflush(stderr); 5334 continue; 5335 } 5336 5337 if ((*new_np = metaname(&sp, newname, 5338 META_DEVICE, ep)) == NULL) { 5339 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5340 "Invalid metadevice name\n")); 5341 (void) fflush(stderr); 5342 continue; 5343 } 5344 5345 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5346 /* make sure the name isn't already being used */ 5347 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5348 metagetmiscname(*new_np, ep) != NULL) { 5349 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5350 "That name already exists\n")); 5351 continue; 5352 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5353 return (-1); 5354 5355 break; 5356 } 5357 5358 /* got a new name, place in used array and return */ 5359 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5360 mdclrerror(ep); 5361 return (1); 5362 } 5363 5364 /* 5365 * FUNCTION: meta_sp_validate_wm() 5366 * INPUT: sp - set name we are recovering in 5367 * compnp - name pointer for device we are recovering from 5368 * options - metarecover options 5369 * OUTPUT: ep - error pointer returned 5370 * RETURNS: int - 0 - success, -1 - error 5371 * PURPOSE: validate and display watermark configuration. walk the 5372 * on-disk watermark structures and validate the information 5373 * found within. since a watermark configuration is 5374 * "self-defining", the act of traversing the watermarks 5375 * is part of the validation process. 5376 */ 5377 static int 5378 meta_sp_validate_wm( 5379 mdsetname_t *sp, 5380 mdname_t *compnp, 5381 mdcmdopts_t options, 5382 md_error_t *ep 5383 ) 5384 { 5385 sp_ext_node_t *extlist = NULL; 5386 sp_ext_node_t *ext; 5387 int num_sps = 0; 5388 int rval; 5389 5390 if ((options & MDCMD_VERBOSE) != 0) 5391 (void) printf(dgettext(TEXT_DOMAIN, 5392 "Verifying on-disk structures on %s.\n"), 5393 compnp->cname); 5394 5395 /* 5396 * for each watermark, build an ext_node, place on list. 5397 */ 5398 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5399 meta_sp_cmp_by_nameseq, ep); 5400 5401 if ((options & MDCMD_VERBOSE) != 0) { 5402 /* print out what we found */ 5403 if (extlist == NULL) 5404 (void) printf(dgettext(TEXT_DOMAIN, 5405 "No extent headers found on %s.\n"), 5406 compnp->cname); 5407 else { 5408 (void) printf(dgettext(TEXT_DOMAIN, 5409 "The following extent headers were found on %s.\n"), 5410 compnp->cname); 5411 meta_sp_display_exthdr(); 5412 } 5413 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5414 meta_sp_display_ext(ext); 5415 } 5416 5417 if (rval < 0) { 5418 (void) printf(dgettext(TEXT_DOMAIN, 5419 "%s: On-disk structures invalid or " 5420 "no soft partitions found.\n"), 5421 compnp->cname); 5422 return (-1); 5423 } 5424 5425 assert(extlist != NULL); 5426 5427 /* count number of soft partitions */ 5428 for (ext = extlist; 5429 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5430 ext = ext->ext_next) { 5431 if (ext->ext_next != NULL && 5432 ext->ext_next->ext_namep != NULL && 5433 strcmp(ext->ext_next->ext_namep->cname, 5434 ext->ext_namep->cname) == 0) 5435 continue; 5436 num_sps++; 5437 } 5438 5439 if ((options & MDCMD_VERBOSE) != 0) 5440 (void) printf(dgettext(TEXT_DOMAIN, 5441 "Found %d soft partition(s) on %s.\n"), num_sps, 5442 compnp->cname); 5443 5444 if (num_sps == 0) { 5445 (void) printf(dgettext(TEXT_DOMAIN, 5446 "%s: No soft partitions.\n"), compnp->cname); 5447 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5448 } 5449 5450 /* check sequence numbers */ 5451 if ((options & MDCMD_VERBOSE) != 0) 5452 (void) printf(dgettext(TEXT_DOMAIN, 5453 "Checking sequence numbers.\n")); 5454 5455 if (meta_sp_checkseq(extlist) != 0) 5456 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5457 5458 return (0); 5459 } 5460 5461 /* 5462 * FUNCTION: meta_sp_validate_unit() 5463 * INPUT: sp - name of set we are recovering in 5464 * compnp - name of component we are recovering from 5465 * options - metarecover options 5466 * OUTPUT: ep - error pointer returned 5467 * RETURNS: int - 0 - success, -1 - error 5468 * PURPOSE: validate and display metadb configuration. begin by getting 5469 * all soft partitions built on the specified component. get 5470 * the unit structure for each one and validate the fields within. 5471 */ 5472 static int 5473 meta_sp_validate_unit( 5474 mdsetname_t *sp, 5475 mdname_t *compnp, 5476 mdcmdopts_t options, 5477 md_error_t *ep 5478 ) 5479 { 5480 md_sp_t *msp; 5481 mdnamelist_t *spnlp = NULL; 5482 mdnamelist_t *namep = NULL; 5483 int count; 5484 uint_t extn; 5485 sp_ext_length_t size; 5486 5487 if ((options & MDCMD_VERBOSE) != 0) 5488 (void) printf(dgettext(TEXT_DOMAIN, 5489 "%s: Validating soft partition metadb entries.\n"), 5490 compnp->cname); 5491 5492 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5493 return (-1); 5494 5495 /* get all soft partitions on component */ 5496 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5497 5498 if (count == 0) { 5499 (void) printf(dgettext(TEXT_DOMAIN, 5500 "%s: No soft partitions.\n"), compnp->cname); 5501 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5502 } else if (count < 0) { 5503 return (-1); 5504 } 5505 5506 /* Now go through the soft partitions and check each one */ 5507 for (namep = spnlp; namep != NULL; namep = namep->next) { 5508 mdname_t *curnp = namep->namep; 5509 sp_ext_offset_t curvoff; 5510 5511 /* get the unit structure */ 5512 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5513 return (-1); 5514 5515 /* verify generic unit structure parameters */ 5516 if ((options & MDCMD_VERBOSE) != 0) 5517 (void) printf(dgettext(TEXT_DOMAIN, 5518 "\nVerifying device %s.\n"), 5519 curnp->cname); 5520 5521 /* 5522 * MD_SP_LAST is an invalid state and is always the 5523 * highest numbered. 5524 */ 5525 if (msp->status >= MD_SP_LAST) { 5526 (void) printf(dgettext(TEXT_DOMAIN, 5527 "%s: status value %u is out of range.\n"), 5528 curnp->cname, msp->status); 5529 return (mdmderror(ep, MDE_RECOVER_FAILED, 5530 0, curnp->cname)); 5531 } else if ((options & MDCMD_VERBOSE) != 0) { 5532 uint_t tstate = 0; 5533 5534 if (metaismeta(msp->compnamep)) { 5535 if (meta_get_tstate(msp->common.namep->dev, 5536 &tstate, ep) != 0) 5537 return (-1); 5538 } 5539 (void) printf(dgettext(TEXT_DOMAIN, 5540 "%s: Status \"%s\" is valid.\n"), 5541 curnp->cname, meta_sp_status_to_name(msp->status, 5542 tstate & MD_DEV_ERRORED)); 5543 } 5544 5545 /* Now verify each extent */ 5546 if ((options & MDCMD_VERBOSE) != 0) 5547 (void) printf("%14s %21s %21s %21s\n", 5548 dgettext(TEXT_DOMAIN, "Extent Number"), 5549 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5550 dgettext(TEXT_DOMAIN, "Physical Offset"), 5551 dgettext(TEXT_DOMAIN, "Length")); 5552 5553 curvoff = 0ULL; 5554 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5555 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5556 5557 if ((options & MDCMD_VERBOSE) != 0) 5558 (void) printf("%14u %21llu %21llu %21llu\n", 5559 extn, extp->voff, extp->poff, extp->len); 5560 5561 if (extp->voff != curvoff) { 5562 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5563 "%s: virtual offset for extent %u " 5564 "is inconsistent, expected %llu, " 5565 "got %llu.\n"), curnp->cname, extn, 5566 curvoff, extp->voff); 5567 return (mdmderror(ep, MDE_RECOVER_FAILED, 5568 0, compnp->cname)); 5569 } 5570 5571 /* make sure extent does not drop off the end */ 5572 if ((extp->poff + extp->len) == size) { 5573 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5574 "%s: extent %u at offset %llu, " 5575 "length %llu exceeds the size of the " 5576 "device, %llu.\n"), curnp->cname, 5577 extn, extp->poff, extp->len, size); 5578 return (mdmderror(ep, MDE_RECOVER_FAILED, 5579 0, compnp->cname)); 5580 } 5581 5582 curvoff += extp->len; 5583 } 5584 } 5585 if (options & MDCMD_PRINT) { 5586 (void) printf(dgettext(TEXT_DOMAIN, 5587 "%s: Soft Partition metadb configuration is valid\n"), 5588 compnp->cname); 5589 } 5590 return (0); 5591 } 5592 5593 /* 5594 * FUNCTION: meta_sp_validate_wm_and_unit() 5595 * INPUT: sp - name of set we are recovering in 5596 * compnp - name of device we are recovering from 5597 * options - metarecover options 5598 * OUTPUT: ep - error pointer returned 5599 * RETURNS: int - 0 - success, -1 error 5600 * PURPOSE: cross-validate and display watermarks and metadb records. 5601 * get both the unit structures for the soft partitions built 5602 * on the specified component and the watermarks found on that 5603 * component and check to make sure they are consistent with 5604 * each other. 5605 */ 5606 static int 5607 meta_sp_validate_wm_and_unit( 5608 mdsetname_t *sp, 5609 mdname_t *np, 5610 mdcmdopts_t options, 5611 md_error_t *ep 5612 ) 5613 { 5614 sp_ext_node_t *wmlist = NULL; 5615 sp_ext_node_t *unitlist = NULL; 5616 sp_ext_node_t *unitext; 5617 sp_ext_node_t *wmext; 5618 sp_ext_offset_t tmpunitoff; 5619 mdnamelist_t *spnlp = NULL; 5620 int count; 5621 int rval = 0; 5622 int verbose = (options & MDCMD_VERBOSE); 5623 5624 /* get unit structure list */ 5625 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5626 if (count <= 0) 5627 return (-1); 5628 5629 meta_sp_list_insert(NULL, NULL, &unitlist, 5630 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5631 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5632 5633 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5634 metafreenamelist(spnlp); 5635 return (-1); 5636 } 5637 5638 metafreenamelist(spnlp); 5639 5640 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5641 5642 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5643 meta_sp_cmp_by_offset, ep) < 0) { 5644 meta_sp_list_free(&unitlist); 5645 return (-1); 5646 } 5647 5648 if (getenv(META_SP_DEBUG)) { 5649 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5650 meta_sp_list_dump(unitlist); 5651 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5652 meta_sp_list_dump(wmlist); 5653 } 5654 5655 /* 5656 * step through both lists and compare allocated nodes. Free 5657 * nodes and end watermarks may differ between the two but 5658 * that's generally ok, and if they're wrong will typically 5659 * cause misplaced allocated extents. 5660 */ 5661 if (verbose) 5662 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5663 "allocations match extent headers.\n"), np->cname); 5664 5665 unitext = unitlist; 5666 wmext = wmlist; 5667 while ((wmext != NULL) && (unitext != NULL)) { 5668 /* find next allocated extents in each list */ 5669 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5670 wmext = wmext->ext_next; 5671 5672 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5673 unitext = unitext->ext_next; 5674 5675 if (wmext == NULL || unitext == NULL) 5676 break; 5677 5678 if (verbose) { 5679 (void) printf(dgettext(TEXT_DOMAIN, 5680 "Metadb extent:\n")); 5681 meta_sp_display_exthdr(); 5682 meta_sp_display_ext(unitext); 5683 (void) printf(dgettext(TEXT_DOMAIN, 5684 "Extent header extent:\n")); 5685 meta_sp_display_exthdr(); 5686 meta_sp_display_ext(wmext); 5687 (void) printf("\n"); 5688 } 5689 5690 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5691 rval = -1; 5692 5693 /* 5694 * if the offsets aren't equal, only increment the 5695 * lowest one in hopes of getting the lists back in sync. 5696 */ 5697 tmpunitoff = unitext->ext_offset; 5698 if (unitext->ext_offset <= wmext->ext_offset) 5699 unitext = unitext->ext_next; 5700 if (wmext->ext_offset <= tmpunitoff) 5701 wmext = wmext->ext_next; 5702 } 5703 5704 /* 5705 * if both lists aren't at the end then there are extra 5706 * allocated nodes in one of them. 5707 */ 5708 if (wmext != NULL) { 5709 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5710 "%s: extent headers contain allocations not in " 5711 "the metadb\n\n"), np->cname); 5712 rval = -1; 5713 } 5714 5715 if (unitext != NULL) { 5716 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5717 "%s: metadb contains allocations not in the extent " 5718 "headers\n\n"), np->cname); 5719 rval = -1; 5720 } 5721 5722 if (options & MDCMD_PRINT) { 5723 if (rval == 0) { 5724 (void) printf(dgettext(TEXT_DOMAIN, 5725 "%s: Soft Partition metadb matches extent " 5726 "header configuration\n"), np->cname); 5727 } else { 5728 (void) printf(dgettext(TEXT_DOMAIN, 5729 "%s: Soft Partition metadb does not match extent " 5730 "header configuration\n"), np->cname); 5731 } 5732 } 5733 5734 return (rval); 5735 } 5736 5737 /* 5738 * FUNCTION: meta_sp_validate_exts() 5739 * INPUT: compnp - name pointer for device we are recovering from 5740 * wmext - extent node representing watermark 5741 * unitext - extent node from unit structure 5742 * OUTPUT: ep - return error pointer 5743 * RETURNS: int - 0 - succes, mdmderror return code - error 5744 * PURPOSE: Takes two extent nodes and checks them against each other. 5745 * offset, length, sequence number, set, and name are compared. 5746 */ 5747 static int 5748 meta_sp_validate_exts( 5749 mdname_t *compnp, 5750 sp_ext_node_t *wmext, 5751 sp_ext_node_t *unitext, 5752 md_error_t *ep 5753 ) 5754 { 5755 if (wmext->ext_offset != unitext->ext_offset) { 5756 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5757 "%s: unit structure and extent header offsets differ.\n"), 5758 compnp->cname); 5759 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5760 } 5761 5762 if (wmext->ext_length != unitext->ext_length) { 5763 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5764 "%s: unit structure and extent header lengths differ.\n"), 5765 compnp->cname); 5766 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5767 } 5768 5769 if (wmext->ext_seq != unitext->ext_seq) { 5770 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5771 "%s: unit structure and extent header sequence numbers " 5772 "differ.\n"), compnp->cname); 5773 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5774 } 5775 5776 if (wmext->ext_type != unitext->ext_type) { 5777 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5778 "%s: unit structure and extent header types differ.\n"), 5779 compnp->cname); 5780 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5781 } 5782 5783 /* 5784 * If one has a set pointer and the other doesn't, error. 5785 * If both extents have setnames, then make sure they match 5786 * If both are NULL, it's ok, they match. 5787 */ 5788 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5789 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5790 "%s: unit structure and extent header set values " 5791 "differ.\n"), compnp->cname); 5792 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5793 } 5794 5795 if (unitext->ext_setp != NULL) { 5796 if (strcmp(unitext->ext_setp->setname, 5797 wmext->ext_setp->setname) != 0) { 5798 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5799 "%s: unit structure and extent header set names " 5800 "differ.\n"), compnp->cname); 5801 return (mdmderror(ep, MDE_RECOVER_FAILED, 5802 0, compnp->cname)); 5803 } 5804 } 5805 5806 /* 5807 * If one has a name pointer and the other doesn't, error. 5808 * If both extents have names, then make sure they match 5809 * If both are NULL, it's ok, they match. 5810 */ 5811 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5812 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5813 "%s: unit structure and extent header name values " 5814 "differ.\n"), compnp->cname); 5815 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5816 } 5817 5818 if (unitext->ext_namep != NULL) { 5819 if (strcmp(wmext->ext_namep->cname, 5820 unitext->ext_namep->cname) != 0) { 5821 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5822 "%s: unit structure and extent header names " 5823 "differ.\n"), compnp->cname); 5824 return (mdmderror(ep, MDE_RECOVER_FAILED, 5825 0, compnp->cname)); 5826 } 5827 } 5828 5829 return (0); 5830 } 5831 5832 /* 5833 * FUNCTION: update_sp_status() 5834 * INPUT: sp - name of set we are recovering in 5835 * minors - pointer to an array of soft partition minor numbers 5836 * num_sps - number of minor numbers in array 5837 * status - new status to be applied to all soft parts in array 5838 * mn_set - set if current set is a multi-node set 5839 * OUTPUT: ep - return error pointer 5840 * RETURNS: int - 0 - success, -1 - error 5841 * PURPOSE: update status of soft partitions to new status. minors is an 5842 * array of minor numbers to apply the new status to. 5843 * If mn_set is set, a message is sent to all nodes in the 5844 * cluster to update the status locally. 5845 */ 5846 static int 5847 update_sp_status( 5848 mdsetname_t *sp, 5849 minor_t *minors, 5850 int num_sps, 5851 sp_status_t status, 5852 bool_t mn_set, 5853 md_error_t *ep 5854 ) 5855 { 5856 int i; 5857 int err = 0; 5858 5859 if (mn_set) { 5860 md_mn_msg_sp_setstat_t sp_setstat_params; 5861 int result; 5862 md_mn_result_t *resp = NULL; 5863 5864 for (i = 0; i < num_sps; i++) { 5865 sp_setstat_params.sp_setstat_mnum = minors[i]; 5866 sp_setstat_params.sp_setstat_status = status; 5867 5868 result = mdmn_send_message(sp->setno, 5869 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 5870 (char *)&sp_setstat_params, 5871 sizeof (sp_setstat_params), 5872 &resp, ep); 5873 if (resp != NULL) { 5874 if (resp->mmr_exitval != 0) 5875 err = -1; 5876 free_result(resp); 5877 } 5878 if (result != 0) { 5879 err = -1; 5880 } 5881 } 5882 } else { 5883 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5884 err = -1; 5885 } 5886 if (err < 0) { 5887 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5888 "Error updating status on recovered soft " 5889 "partitions.\n")); 5890 } 5891 return (err); 5892 } 5893 5894 /* 5895 * FUNCTION: meta_sp_recover_from_wm() 5896 * INPUT: sp - name of set we are recovering in 5897 * compnp - name pointer for component we are recovering from 5898 * options - metarecover options 5899 * OUTPUT: ep - return error pointer 5900 * RETURNS: int - 0 - success, -1 - error 5901 * PURPOSE: update metadb records to match watermarks. begin by getting 5902 * an extlist representing all soft partitions on the component. 5903 * then build a unit structure for each soft partition. 5904 * notify user of changes, then commit each soft partition to 5905 * the metadb one at a time in the "recovering" state. update 5906 * any watermarks that may need it (to reflect possible name 5907 * changes), and, finally, set the status of all recovered 5908 * partitions to the "OK" state at once. 5909 */ 5910 static int 5911 meta_sp_recover_from_wm( 5912 mdsetname_t *sp, 5913 mdname_t *compnp, 5914 mdcmdopts_t options, 5915 md_error_t *ep 5916 ) 5917 { 5918 sp_ext_node_t *extlist = NULL; 5919 sp_ext_node_t *sp_list = NULL; 5920 sp_ext_node_t *update_list = NULL; 5921 sp_ext_node_t *ext; 5922 sp_ext_node_t *sp_ext; 5923 mp_unit_t *mp; 5924 mp_unit_t **un_array; 5925 int numexts = 0, num_sps = 0, i = 0; 5926 int err = 0; 5927 int not_recovered = 0; 5928 int committed = 0; 5929 sp_ext_length_t sp_length = 0LL; 5930 mdnamelist_t *keynlp = NULL; 5931 mdname_t *np; 5932 mdname_t *new_np; 5933 int new_name; 5934 md_set_params_t set_params; 5935 minor_t *minors = NULL; 5936 char yesno[255]; 5937 char *yes; 5938 bool_t mn_set = 0; 5939 md_set_desc *sd; 5940 mm_unit_t *mm; 5941 md_set_mmown_params_t *ownpar = NULL; 5942 int comp_is_mirror = 0; 5943 5944 /* 5945 * if this component appears in another metadevice already, do 5946 * NOT recover from it. 5947 */ 5948 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5949 return (-1); 5950 5951 /* set flag if dealing with a MN set */ 5952 if (!metaislocalset(sp)) { 5953 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5954 return (-1); 5955 } 5956 if (MD_MNSET_DESC(sd)) 5957 mn_set = 1; 5958 } 5959 /* 5960 * for each watermark, build an ext_node, place on list. 5961 */ 5962 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5963 meta_sp_cmp_by_nameseq, ep) < 0) 5964 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5965 5966 assert(extlist != NULL); 5967 5968 /* count number of soft partitions */ 5969 for (ext = extlist; 5970 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5971 ext = ext->ext_next) { 5972 if (ext->ext_next != NULL && 5973 ext->ext_next->ext_namep != NULL && 5974 strcmp(ext->ext_next->ext_namep->cname, 5975 ext->ext_namep->cname) == 0) 5976 continue; 5977 num_sps++; 5978 } 5979 5980 /* allocate array of unit structure pointers */ 5981 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5982 5983 /* 5984 * build unit structures from list of ext_nodes. 5985 */ 5986 for (ext = extlist; 5987 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5988 ext = ext->ext_next) { 5989 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5990 &sp_list, ext->ext_offset, ext->ext_length, 5991 ext->ext_type, ext->ext_seq, ext->ext_flags, 5992 meta_sp_cmp_by_nameseq); 5993 5994 numexts++; 5995 sp_length += ext->ext_length - MD_SP_WMSIZE; 5996 5997 if (ext->ext_next != NULL && 5998 ext->ext_next->ext_namep != NULL && 5999 strcmp(ext->ext_next->ext_namep->cname, 6000 ext->ext_namep->cname) == 0) 6001 continue; 6002 6003 /* 6004 * if we made it here, we are at a soft partition 6005 * boundary in the list. 6006 */ 6007 if (getenv(META_SP_DEBUG)) { 6008 meta_sp_debug("meta_recover_from_wm: dumping wm " 6009 "list:\n"); 6010 meta_sp_list_dump(sp_list); 6011 } 6012 6013 assert(sp_list != NULL); 6014 assert(sp_list->ext_namep != NULL); 6015 6016 if ((new_name = meta_sp_resolve_name_conflict(sp, 6017 sp_list->ext_namep, &new_np, ep)) < 0) { 6018 err = 1; 6019 goto out; 6020 } else if (new_name) { 6021 for (sp_ext = sp_list; 6022 sp_ext != NULL; 6023 sp_ext = sp_ext->ext_next) { 6024 /* 6025 * insert into the update list for 6026 * watermark update. 6027 */ 6028 meta_sp_list_insert(sp_ext->ext_setp, 6029 new_np, &update_list, sp_ext->ext_offset, 6030 sp_ext->ext_length, sp_ext->ext_type, 6031 sp_ext->ext_seq, EXTFLG_UPDATE, 6032 meta_sp_cmp_by_offset); 6033 } 6034 6035 } 6036 if (options & MDCMD_DOIT) { 6037 /* store name in namespace */ 6038 if (mn_set) { 6039 /* send message to all nodes to return key */ 6040 md_mn_msg_addkeyname_t *send_params; 6041 int result; 6042 md_mn_result_t *resp = NULL; 6043 int message_size; 6044 6045 message_size = sizeof (*send_params) + 6046 strlen(compnp->cname) + 1; 6047 send_params = Zalloc(message_size); 6048 send_params->addkeyname_setno = sp->setno; 6049 (void) strcpy(&send_params->addkeyname_name[0], 6050 compnp->cname); 6051 result = mdmn_send_message(sp->setno, 6052 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6053 (char *)send_params, message_size, &resp, 6054 ep); 6055 Free(send_params); 6056 if (resp != NULL) { 6057 if (resp->mmr_exitval >= 0) { 6058 compnp->key = 6059 (mdkey_t)resp->mmr_exitval; 6060 } else { 6061 err = 1; 6062 free_result(resp); 6063 goto out; 6064 } 6065 free_result(resp); 6066 } 6067 if (result != 0) { 6068 err = 1; 6069 goto out; 6070 } 6071 (void) metanamelist_append(&keynlp, compnp); 6072 } else { 6073 if (add_key_name(sp, compnp, &keynlp, 6074 ep) != 0) { 6075 err = 1; 6076 goto out; 6077 } 6078 } 6079 } 6080 6081 /* create the unit structure */ 6082 if ((mp = meta_sp_createunit( 6083 (new_name) ? new_np : sp_list->ext_namep, compnp, 6084 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6085 err = 1; 6086 goto out; 6087 } 6088 6089 if (getenv(META_SP_DEBUG)) { 6090 meta_sp_debug("meta_sp_recover_from_wm: " 6091 "printing newly created unit structure"); 6092 meta_sp_printunit(mp); 6093 } 6094 6095 /* place in unit structure array */ 6096 un_array[i++] = mp; 6097 6098 /* free sp_list */ 6099 meta_sp_list_free(&sp_list); 6100 sp_list = NULL; 6101 numexts = 0; 6102 sp_length = 0LL; 6103 } 6104 6105 /* display configuration updates */ 6106 (void) printf(dgettext(TEXT_DOMAIN, 6107 "The following soft partitions were found and will be added to\n" 6108 "your metadevice configuration.\n")); 6109 (void) printf("%5s %15s %18s\n", 6110 dgettext(TEXT_DOMAIN, "Name"), 6111 dgettext(TEXT_DOMAIN, "Size"), 6112 dgettext(TEXT_DOMAIN, "No. of Extents")); 6113 for (i = 0; i < num_sps; i++) { 6114 (void) printf("%5s%lu %15llu %9d\n", "d", 6115 MD_MIN2UNIT(MD_SID(un_array[i])), 6116 un_array[i]->un_length, un_array[i]->un_numexts); 6117 } 6118 6119 if (!(options & MDCMD_DOIT)) { 6120 not_recovered = 1; 6121 goto out; 6122 } 6123 6124 /* ask user for confirmation */ 6125 (void) printf(dgettext(TEXT_DOMAIN, 6126 "WARNING: You are about to add one or more soft partition\n" 6127 "metadevices to your metadevice configuration. If there\n" 6128 "appears to be an error in the soft partition(s) displayed\n" 6129 "above, do NOT proceed with this recovery operation.\n")); 6130 (void) printf(dgettext(TEXT_DOMAIN, 6131 "Are you sure you want to do this (yes/no)? ")); 6132 6133 (void) fflush(stdout); 6134 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6135 (strlen(yesno) == 1)) 6136 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6137 dgettext(TEXT_DOMAIN, "no")); 6138 yes = dgettext(TEXT_DOMAIN, "yes"); 6139 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6140 not_recovered = 1; 6141 goto out; 6142 } 6143 6144 /* commit records one at a time */ 6145 for (i = 0; i < num_sps; i++) { 6146 (void) memset(&set_params, 0, sizeof (set_params)); 6147 set_params.mnum = MD_SID(un_array[i]); 6148 set_params.size = (un_array[i])->c.un_size; 6149 set_params.mdp = (uintptr_t)(un_array[i]); 6150 set_params.options = 6151 meta_check_devicesize(un_array[i]->un_length); 6152 if (set_params.options == MD_CRO_64BIT) { 6153 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6154 } else { 6155 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6156 } 6157 MD_SETDRIVERNAME(&set_params, MD_SP, 6158 MD_MIN2SET(set_params.mnum)); 6159 6160 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6161 6162 /* 6163 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6164 */ 6165 if (mn_set) { 6166 md_mn_msg_iocset_t send_params; 6167 int result; 6168 md_mn_result_t *resp = NULL; 6169 int mess_size; 6170 6171 /* 6172 * Calculate message size. md_mn_msg_iocset_t only 6173 * contains one extent, so increment the size to 6174 * include all extents 6175 */ 6176 mess_size = sizeof (send_params) - 6177 sizeof (mp_ext_t) + 6178 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6179 6180 send_params.iocset_params = set_params; 6181 (void) memcpy(&send_params.unit, un_array[i], 6182 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6183 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6184 result = mdmn_send_message(sp->setno, 6185 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 6186 (char *)&send_params, mess_size, &resp, 6187 ep); 6188 if (resp != NULL) { 6189 if (resp->mmr_exitval != 0) 6190 err = 1; 6191 free_result(resp); 6192 } 6193 if (result != 0) { 6194 err = 1; 6195 } 6196 } else { 6197 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6198 np->cname) != 0) { 6199 err = 1; 6200 } 6201 } 6202 6203 if (err == 1) { 6204 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6205 "%s: Error committing record to metadb.\n"), 6206 np->cname); 6207 goto out; 6208 } 6209 6210 /* note that we've committed a record */ 6211 if (!committed) 6212 committed = 1; 6213 6214 /* update any watermarks that need it */ 6215 if (update_list != NULL) { 6216 md_sp_t *msp; 6217 6218 /* 6219 * Check to see if we're trying to create a partition 6220 * on a mirror. If so we may have to enforce an 6221 * ownership change before writing the watermark out. 6222 */ 6223 if (metaismeta(compnp)) { 6224 char *miscname; 6225 6226 miscname = metagetmiscname(compnp, ep); 6227 if (miscname != NULL) 6228 comp_is_mirror = (strcmp(miscname, 6229 MD_MIRROR) == 0); 6230 else 6231 comp_is_mirror = 0; 6232 } 6233 /* 6234 * If this is a MN set and the component is a mirror, 6235 * change ownership to this node in order to write the 6236 * watermarks 6237 */ 6238 if (mn_set && comp_is_mirror) { 6239 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6240 if (mm == NULL) { 6241 err = 1; 6242 goto out; 6243 } else { 6244 err = meta_mn_change_owner(&ownpar, 6245 sp->setno, 6246 meta_getminor(compnp->dev), 6247 sd->sd_mn_mynode->nd_nodeid, 6248 MD_MN_MM_PREVENT_CHANGE | 6249 MD_MN_MM_SPAWN_THREAD); 6250 if (err != 0) 6251 goto out; 6252 } 6253 } 6254 6255 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6256 err = 1; 6257 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6258 "%s: Error updating extent headers.\n"), 6259 np->cname); 6260 goto out; 6261 } 6262 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6263 err = 1; 6264 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6265 "%s: Error updating extent headers " 6266 "on disk.\n"), np->cname); 6267 goto out; 6268 } 6269 } 6270 /* 6271 * If we have changed ownership earlier and prevented any 6272 * ownership changes, we can now allow ownership changes 6273 * again. 6274 */ 6275 if (ownpar) { 6276 (void) meta_mn_change_owner(&ownpar, sp->setno, 6277 ownpar->d.mnum, 6278 ownpar->d.owner, 6279 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6280 } 6281 } 6282 6283 /* update status of all soft partitions to OK */ 6284 minors = Zalloc(num_sps * sizeof (minor_t)); 6285 for (i = 0; i < num_sps; i++) 6286 minors[i] = MD_SID(un_array[i]); 6287 6288 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6289 if (err != 0) 6290 goto out; 6291 6292 if (options & MDCMD_PRINT) 6293 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6294 "Soft Partitions recovered from device.\n"), 6295 compnp->cname); 6296 out: 6297 /* free memory */ 6298 if (extlist != NULL) 6299 meta_sp_list_free(&extlist); 6300 if (sp_list != NULL) 6301 meta_sp_list_free(&sp_list); 6302 if (update_list != NULL) 6303 meta_sp_list_free(&update_list); 6304 if (un_array != NULL) { 6305 for (i = 0; i < num_sps; i++) 6306 Free(un_array[i]); 6307 Free(un_array); 6308 } 6309 if (minors != NULL) 6310 Free(minors); 6311 if (ownpar != NULL) 6312 Free(ownpar); 6313 (void) fflush(stdout); 6314 6315 if ((keynlp != NULL) && (committed != 1)) { 6316 /* 6317 * if we haven't committed any softparts, either because of an 6318 * error or because the user decided not to proceed, delete 6319 * namelist key for the component 6320 */ 6321 if (mn_set) { 6322 mdnamelist_t *p; 6323 6324 for (p = keynlp; (p != NULL); p = p->next) { 6325 mdname_t *np = p->namep; 6326 md_mn_msg_delkeyname_t send_params; 6327 md_mn_result_t *resp = NULL; 6328 6329 send_params.delkeyname_dev = np->dev; 6330 send_params.delkeyname_setno = sp->setno; 6331 send_params.delkeyname_key = np->key; 6332 (void) mdmn_send_message(sp->setno, 6333 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6334 (char *)&send_params, sizeof (send_params), 6335 &resp, ep); 6336 if (resp != NULL) { 6337 free_result(resp); 6338 } 6339 } 6340 } else { 6341 (void) del_key_names(sp, keynlp, NULL); 6342 } 6343 } 6344 6345 metafreenamelist(keynlp); 6346 6347 if (err) 6348 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6349 6350 if (not_recovered) 6351 if (options & MDCMD_PRINT) 6352 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6353 "Soft Partitions NOT recovered from device.\n"), 6354 compnp->cname); 6355 return (0); 6356 } 6357 6358 /* 6359 * FUNCTION: meta_sp_recover_from_unit() 6360 * INPUT: sp - name of set we are recovering in 6361 * compnp - name of component we are recovering from 6362 * options - metarecover options 6363 * OUTPUT: ep - return error pointer 6364 * RETURNS: int - 0 - success, -1 - error 6365 * PURPOSE: update watermarks to match metadb records. begin by getting 6366 * a namelist representing all soft partitions on the specified 6367 * component. then, build an extlist representing the soft 6368 * partitions, filling in the freespace extents. notify user 6369 * of changes, place all soft partitions into the "recovering" 6370 * state and update the watermarks. finally, return all soft 6371 * partitions to the "OK" state. 6372 */ 6373 static int 6374 meta_sp_recover_from_unit( 6375 mdsetname_t *sp, 6376 mdname_t *compnp, 6377 mdcmdopts_t options, 6378 md_error_t *ep 6379 ) 6380 { 6381 mdnamelist_t *spnlp = NULL; 6382 mdnamelist_t *nlp = NULL; 6383 sp_ext_node_t *ext = NULL; 6384 sp_ext_node_t *extlist = NULL; 6385 int count; 6386 char yesno[255]; 6387 char *yes; 6388 int rval = 0; 6389 minor_t *minors = NULL; 6390 int i; 6391 md_sp_t *msp; 6392 md_set_desc *sd; 6393 bool_t mn_set = 0; 6394 daddr_t start_block; 6395 6396 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6397 if (count <= 0) 6398 return (-1); 6399 6400 /* set flag if dealing with a MN set */ 6401 if (!metaislocalset(sp)) { 6402 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6403 return (-1); 6404 } 6405 if (MD_MNSET_DESC(sd)) 6406 mn_set = 1; 6407 } 6408 /* 6409 * Save the XDR unit structure for one of the soft partitions; 6410 * we'll use this later to provide metadevice context to 6411 * update the watermarks so the device can be resolved by 6412 * devid instead of dev_t. 6413 */ 6414 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6415 metafreenamelist(spnlp); 6416 return (-1); 6417 } 6418 6419 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6420 MD_DISKADDR_ERROR) { 6421 return (-1); 6422 } 6423 6424 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6425 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6426 meta_sp_list_insert(NULL, NULL, &extlist, 6427 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6428 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6429 6430 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6431 metafreenamelist(spnlp); 6432 return (-1); 6433 } 6434 6435 assert(extlist != NULL); 6436 if ((options & MDCMD_VERBOSE) != 0) { 6437 (void) printf(dgettext(TEXT_DOMAIN, 6438 "Updating extent headers on device %s from metadb.\n\n"), 6439 compnp->cname); 6440 (void) printf(dgettext(TEXT_DOMAIN, 6441 "The following extent headers will be written:\n")); 6442 meta_sp_display_exthdr(); 6443 } 6444 6445 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6446 6447 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6448 6449 /* mark every node for updating except the reserved space */ 6450 if (ext->ext_type != EXTTYP_RESERVED) { 6451 ext->ext_flags |= EXTFLG_UPDATE; 6452 6453 /* print extent information */ 6454 if ((options & MDCMD_VERBOSE) != 0) 6455 meta_sp_display_ext(ext); 6456 } 6457 } 6458 6459 /* request verification and then update all watermarks */ 6460 if ((options & MDCMD_DOIT) != 0) { 6461 6462 (void) printf(dgettext(TEXT_DOMAIN, 6463 "\nWARNING: You are about to overwrite portions of %s\n" 6464 "with soft partition metadata. The extent headers will be\n" 6465 "written to match the existing metadb configuration. If\n" 6466 "the device was not previously setup with this\n" 6467 "configuration, data loss may result.\n\n"), 6468 compnp->cname); 6469 (void) printf(dgettext(TEXT_DOMAIN, 6470 "Are you sure you want to do this (yes/no)? ")); 6471 6472 (void) fflush(stdout); 6473 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6474 (strlen(yesno) == 1)) 6475 (void) snprintf(yesno, sizeof (yesno), 6476 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6477 yes = dgettext(TEXT_DOMAIN, "yes"); 6478 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6479 /* place soft partitions into recovering state */ 6480 minors = Zalloc(count * sizeof (minor_t)); 6481 for (nlp = spnlp, i = 0; 6482 nlp != NULL && i < count; 6483 nlp = nlp->next, i++) { 6484 assert(nlp->namep != NULL); 6485 minors[i] = meta_getminor(nlp->namep->dev); 6486 } 6487 if (update_sp_status(sp, minors, count, 6488 MD_SP_RECOVER, mn_set, ep) != 0) { 6489 rval = -1; 6490 goto out; 6491 } 6492 6493 /* update the watermarks */ 6494 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6495 rval = -1; 6496 goto out; 6497 } 6498 6499 if (options & MDCMD_PRINT) { 6500 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6501 "Soft Partitions recovered from metadb\n"), 6502 compnp->cname); 6503 } 6504 6505 /* return soft partitions to the OK state */ 6506 if (update_sp_status(sp, minors, count, 6507 MD_SP_OK, mn_set, ep) != 0) { 6508 rval = -1; 6509 goto out; 6510 } 6511 6512 rval = 0; 6513 goto out; 6514 } 6515 } 6516 6517 if (options & MDCMD_PRINT) { 6518 (void) printf(dgettext(TEXT_DOMAIN, 6519 "%s: Soft Partitions NOT recovered from metadb\n"), 6520 compnp->cname); 6521 } 6522 6523 out: 6524 if (minors != NULL) 6525 Free(minors); 6526 metafreenamelist(spnlp); 6527 meta_sp_list_free(&extlist); 6528 (void) fflush(stdout); 6529 return (rval); 6530 } 6531 6532 6533 /* 6534 * FUNCTION: meta_sp_update_abr() 6535 * INPUT: sp - name of set we are recovering in 6536 * OUTPUT: ep - return error pointer 6537 * RETURNS: int - 0 - success, -1 - error 6538 * PURPOSE: update the ABR state for all soft partitions in the set. This 6539 * is called when joining a set. It sends a message to the master 6540 * node for each soft partition to get the value of tstate and 6541 * then sets ABR ,if required, by opening the sp, setting ABR 6542 * and then closing the sp. This approach is taken rather that 6543 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6544 * the case when we have another node simultaneously unsetting ABR. 6545 */ 6546 int 6547 meta_sp_update_abr( 6548 mdsetname_t *sp, 6549 md_error_t *ep 6550 ) 6551 { 6552 mdnamelist_t *devnlp = NULL; 6553 mdnamelist_t *p; 6554 mdname_t *devnp = NULL; 6555 md_unit_t *un; 6556 char fname[MAXPATHLEN]; 6557 int mnum, fd; 6558 volcap_t vc; 6559 uint_t tstate; 6560 6561 6562 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6563 return (-1); 6564 } 6565 6566 /* Exit if no soft partitions in this set */ 6567 if (devnlp == NULL) 6568 return (0); 6569 6570 /* For each soft partition */ 6571 for (p = devnlp; (p != NULL); p = p->next) { 6572 devnp = p->namep; 6573 6574 /* check if this is a top level metadevice */ 6575 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6576 goto out; 6577 if (MD_HAS_PARENT(MD_PARENT(un))) { 6578 Free(un); 6579 continue; 6580 } 6581 Free(un); 6582 6583 /* Get tstate from Master */ 6584 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6585 mdname_t *np; 6586 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6587 ep); 6588 if (np) { 6589 md_perror(dgettext(TEXT_DOMAIN, 6590 "Unable to get tstate for %s"), np->cname); 6591 } 6592 continue; 6593 } 6594 /* If not set on the master, nothing to do */ 6595 if (!(tstate & MD_ABR_CAP)) 6596 continue; 6597 6598 mnum = meta_getminor(devnp->dev); 6599 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6600 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6601 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6602 md_perror(dgettext(TEXT_DOMAIN, 6603 "Could not open device %s"), fname); 6604 continue; 6605 } 6606 6607 /* Set ABR state */ 6608 vc.vc_info = 0; 6609 vc.vc_set = 0; 6610 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6611 (void) close(fd); 6612 continue; 6613 } 6614 6615 vc.vc_set = DKV_ABR_CAP; 6616 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6617 (void) close(fd); 6618 goto out; 6619 } 6620 6621 (void) close(fd); 6622 } 6623 metafreenamelist(devnlp); 6624 return (0); 6625 out: 6626 metafreenamelist(devnlp); 6627 return (-1); 6628 } 6629 6630 /* 6631 * FUNCTION: meta_mn_sp_update_abr() 6632 * INPUT: arg - Given set. 6633 * PURPOSE: update the ABR state for all soft partitions in the set by 6634 * forking a process to call meta_sp_update_abr() 6635 * This function is only called via rpc.metad when adding a node 6636 * to a set, ie this node is beong joined to the set by another 6637 * node. 6638 */ 6639 void * 6640 meta_mn_sp_update_abr(void *arg) 6641 { 6642 set_t setno = *((set_t *)arg); 6643 mdsetname_t *sp; 6644 md_error_t mde = mdnullerror; 6645 int fval; 6646 6647 /* should have a set */ 6648 assert(setno != NULL); 6649 6650 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6651 mde_perror(&mde, ""); 6652 return (NULL); 6653 } 6654 6655 if (!(meta_is_mn_set(sp, &mde))) { 6656 mde_perror(&mde, ""); 6657 return (NULL); 6658 } 6659 6660 /* fork a process */ 6661 if ((fval = md_daemonize(sp, &mde)) != 0) { 6662 /* 6663 * md_daemonize will fork off a process. The is the 6664 * parent or error. 6665 */ 6666 if (fval > 0) { 6667 return (NULL); 6668 } 6669 mde_perror(&mde, ""); 6670 return (NULL); 6671 } 6672 /* 6673 * Child process should never return back to rpc.metad, but 6674 * should exit. 6675 * Flush all internally cached data inherited from parent process 6676 * since cached data will be cleared when parent process RPC request 6677 * has completed (which is possibly before this child process 6678 * can complete). 6679 * Child process can retrieve and cache its own copy of data from 6680 * rpc.metad that won't be changed by the parent process. 6681 * 6682 * Reset md_in_daemon since this child will be a client of rpc.metad 6683 * not part of the rpc.metad daemon itself. 6684 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6685 * this thread is rpc.metad or any other thread. (If this thread 6686 * was rpc.metad it could use some short circuit code to get data 6687 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6688 */ 6689 md_in_daemon = 0; 6690 metaflushsetname(sp); 6691 sr_cache_flush_setno(setno); 6692 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6693 mde_perror(&mde, ""); 6694 md_exit(sp, 1); 6695 } 6696 6697 6698 /* 6699 * Closing stdin/out/err here. 6700 */ 6701 (void) close(0); 6702 (void) close(1); 6703 (void) close(2); 6704 assert(fval == 0); 6705 6706 (void) meta_sp_update_abr(sp, &mde); 6707 6708 md_exit(sp, 0); 6709 /*NOTREACHED*/ 6710 return (NULL); 6711 } 6712