1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Just in case we're not in a build environment, make sure that 30 * TEXT_DOMAIN gets set to something. 31 */ 32 #if !defined(TEXT_DOMAIN) 33 #define TEXT_DOMAIN "SYS_TEST" 34 #endif 35 36 /* 37 * soft partition operations 38 * 39 * Soft Partitions provide a virtual disk mechanism which is used to 40 * divide a large volume into many small pieces, each appearing as a 41 * separate device. A soft partition consists of a series of extents, 42 * each having an offset and a length. The extents are logically 43 * contiguous, so where the first extent leaves off the second extent 44 * picks up. Which extent a given "virtual offset" belongs to is 45 * dependent on the size of all the previous extents in the soft 46 * partition. 47 * 48 * Soft partitions are represented in memory by an extent node 49 * (sp_ext_node_t) which contains all of the information necessary to 50 * create a unit structure and update the on-disk format, called 51 * "watermarks". These extent nodes are typically kept in a doubly 52 * linked list and are manipulated by list manipulation routines. A 53 * list of extents may represent all of the soft partitions on a volume, 54 * a single soft partition, or perhaps just a set of extents that need 55 * to be updated. Extent lists may be sorted by extent or by name/seq#, 56 * depending on which compare function is used. Most of the routines 57 * require the list be sorted by offset to work, and that's the typical 58 * configuration. 59 * 60 * In order to do an allocation, knowledge of all soft partitions on the 61 * volume is required. Then free space is determined from the space 62 * that is not allocated, and new allocations can be made from the free 63 * space. Once the new allocations are made, a unit structure is created 64 * and the watermarks are updated. The status is then changed to "okay" 65 * on the unit structure to commit the transaction. If updating the 66 * watermarks fails, the unit structure is in an intermediate state and 67 * the driver will not allow access to the device. 68 * 69 * A typical sequence of events is: 70 * 1. Fetch the list of names for all soft partitions on a volume 71 * meta_sp_get_by_component() 72 * 2. Construct an extent list from the name list 73 * meta_sp_extlist_from_namelist() 74 * 3. Fill the gaps in the extent list with free extents 75 * meta_sp_list_freefill() 76 * 4. Allocate from the free extents 77 * meta_sp_alloc_by_len() 78 * meta_sp_alloc_by_list() 79 * 5. Create the unit structure from the extent list 80 * meta_sp_createunit() 81 * meta_sp_updateunit() 82 * 6. Write out the watermarks 83 * meta_sp_update_wm() 84 * 7. Set the status to "Okay" 85 * meta_sp_setstatus() 86 * 87 */ 88 89 #include <stdio.h> 90 #include <meta.h> 91 #include "meta_repartition.h" 92 #include <sys/lvm/md_sp.h> 93 #include <sys/lvm/md_crc.h> 94 #include <strings.h> 95 #include <sys/lvm/md_mirror.h> 96 #include <sys/bitmap.h> 97 98 extern int md_in_daemon; 99 100 typedef struct sp_ext_node { 101 struct sp_ext_node *ext_next; /* next element */ 102 struct sp_ext_node *ext_prev; /* previous element */ 103 sp_ext_type_t ext_type; /* type of extent */ 104 sp_ext_offset_t ext_offset; /* starting offset */ 105 sp_ext_length_t ext_length; /* length of this node */ 106 uint_t ext_flags; /* extent flags */ 107 uint32_t ext_seq; /* watermark seq no */ 108 mdname_t *ext_namep; /* name pointer */ 109 mdsetname_t *ext_setp; /* set pointer */ 110 } sp_ext_node_t; 111 112 /* extent flags */ 113 #define EXTFLG_UPDATE (1) 114 115 /* Extent node compare function for list sorting */ 116 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 117 118 119 /* Function Prototypes */ 120 121 /* Debugging Functions */ 122 static void meta_sp_debug(char *format, ...); 123 static void meta_sp_printunit(mp_unit_t *mp); 124 125 /* Misc Support Functions */ 126 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 127 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 128 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 129 md_error_t *ep); 130 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 131 mdnamelist_t **nlpp, int force, md_error_t *ep); 132 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 133 mdname_t *compnp, md_error_t *ep); 134 135 /* Extent List Manipulation Functions */ 136 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 137 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 138 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 139 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 140 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 141 static void meta_sp_list_free(sp_ext_node_t **head); 142 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 143 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 144 sp_ext_type_t exttype, int exclude_wm); 145 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 146 sp_ext_offset_t offset); 147 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 148 sp_ext_length_t size); 149 static void meta_sp_list_dump(sp_ext_node_t *head); 150 static int meta_sp_list_overlaps(sp_ext_node_t *head); 151 152 /* Extent List Query Functions */ 153 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 154 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 155 sp_ext_length_t alignment); 156 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 157 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 158 md_error_t *ep); 159 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 160 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 161 162 163 /* Extent Allocation Functions */ 164 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 165 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 166 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 167 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 168 sp_ext_node_t **extlist, sp_ext_length_t *lp, 169 sp_ext_offset_t last_off, sp_ext_length_t alignment); 170 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 171 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 172 173 /* Extent List Population Functions */ 174 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 175 sp_ext_node_t **extlist, md_error_t *ep); 176 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 177 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 178 179 /* Print (metastat) Functions */ 180 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 181 mdprtopts_t options, md_error_t *ep); 182 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 183 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 184 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 185 186 /* Watermark Manipulation Functions */ 187 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 188 sp_ext_node_t *extlist, md_error_t *ep); 189 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 190 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 191 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 192 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 193 md_error_t *ep); 194 195 /* Unit Structure Manipulation Functions */ 196 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 197 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 198 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 199 sp_status_t status, md_error_t *ep); 200 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 201 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 202 md_error_t *ep); 203 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 204 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 205 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 206 int *repart_options, md_error_t *ep); 207 208 /* Reset (metaclear) Functions */ 209 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 210 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 211 212 /* Recovery (metarecover) Functions */ 213 static void meta_sp_display_exthdr(void); 214 static void meta_sp_display_ext(sp_ext_node_t *ext); 215 static int meta_sp_checkseq(sp_ext_node_t *extlist); 216 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 217 mdname_t **, md_error_t *); 218 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 219 mdcmdopts_t options, md_error_t *ep); 220 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 221 mdcmdopts_t options, md_error_t *ep); 222 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 223 mdcmdopts_t options, md_error_t *ep); 224 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 225 sp_ext_node_t *unitext, md_error_t *ep); 226 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 227 mdcmdopts_t options, md_error_t *ep); 228 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 229 mdcmdopts_t options, md_error_t *ep); 230 231 /* 232 * Private Constants 233 */ 234 235 static const int FORCE_RELOAD_CACHE = 1; 236 static const uint_t NO_FLAGS = 0; 237 static const sp_ext_offset_t NO_OFFSET = 0ULL; 238 static const uint_t NO_SEQUENCE_NUMBER = 0; 239 static const int ONE_SOFT_PARTITION = 1; 240 241 static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)]; 242 243 #define TEST_SOFT_PARTITION_NAMEP NULL 244 #define TEST_SETNAMEP NULL 245 246 #define EXCLUDE_WM (1) 247 #define INCLUDE_WM (0) 248 249 #define SP_UNALIGNED (0LL) 250 251 /* 252 * ************************************************************************** 253 * Debugging Functions * 254 * ************************************************************************** 255 */ 256 257 /*PRINTFLIKE1*/ 258 static void 259 meta_sp_debug(char *format, ...) 260 { 261 static int debug; 262 static int debug_set = 0; 263 va_list ap; 264 265 if (!debug_set) { 266 debug = getenv(META_SP_DEBUG) ? 1 : 0; 267 debug_set = 1; 268 } 269 270 if (debug) { 271 va_start(ap, format); 272 (void) vfprintf(stderr, format, ap); 273 va_end(ap); 274 } 275 } 276 277 static void 278 meta_sp_printunit(mp_unit_t *mp) 279 { 280 int i; 281 282 if (mp == NULL) 283 return; 284 285 /* print the common fields we know about */ 286 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 287 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 288 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 289 290 /* sp-specific fields */ 291 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 292 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 293 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 294 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 295 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 296 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 297 298 /* print extent information */ 299 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 300 for (i = 0; i < mp->un_numexts; i++) { 301 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 302 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 303 mp->un_ext[i].un_len); 304 } 305 } 306 307 /* 308 * FUNCTION: meta_sp_parsesize() 309 * INPUT: s - the string to parse 310 * OUTPUT: *szp - disk block count (0 for "all") 311 * RETURNS: -1 for error, 0 for success 312 * PURPOSE: parses the command line parameter that specifies the 313 * requested size of a soft partition. The input string 314 * is either the literal "all" or a numeric value 315 * followed by a single character, b for disk blocks, k 316 * for kilobytes, m for megabytes, g for gigabytes, or t 317 * for terabytes. p for petabytes and e for exabytes 318 * have been added as undocumented features for future 319 * expansion. For example, 100m is 100 megabytes, while 320 * 50g is 50 gigabytes. All values are rounded up to the 321 * nearest block size. 322 */ 323 int 324 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 325 { 326 if (s == NULL || szp == NULL) { 327 return (-1); 328 } 329 330 /* Check for literal "all" */ 331 if (strcasecmp(s, "all") == 0) { 332 *szp = 0; 333 return (0); 334 } 335 336 return (meta_sp_parsesizestring(s, szp)); 337 } 338 339 /* 340 * FUNCTION: meta_sp_parsesizestring() 341 * INPUT: s - the string to parse 342 * OUTPUT: *szp - disk block count 343 * RETURNS: -1 for error, 0 for success 344 * PURPOSE: parses a string that specifies size. The input string is a 345 * numeric value followed by a single character, b for disk blocks, 346 * k for kilobytes, m for megabytes, g for gigabytes, or t for 347 * terabytes. p for petabytes and e for exabytes have been added 348 * as undocumented features for future expansion. For example, 349 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 350 * are rounded up to the nearest block size. 351 */ 352 static int 353 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 354 { 355 sp_ext_length_t len = 0; 356 char len_type[2]; 357 358 if (s == NULL || szp == NULL) { 359 return (-1); 360 } 361 362 /* 363 * make sure block offset does not overflow 2^64 bytes. 364 */ 365 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 366 (len == 0LL) || 367 (len > (1LL << (64 - DEV_BSHIFT)))) 368 return (-1); 369 370 switch (len_type[0]) { 371 case 'B': 372 case 'b': 373 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 374 break; 375 case 'K': 376 case 'k': 377 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 378 break; 379 case 'M': 380 case 'm': 381 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 382 break; 383 case 'g': 384 case 'G': 385 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 386 break; 387 case 't': 388 case 'T': 389 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 390 DEV_BSIZE)); 391 break; 392 case 'p': 393 case 'P': 394 len = lbtodb(roundup( 395 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 396 DEV_BSIZE)); 397 break; 398 case 'e': 399 case 'E': 400 len = lbtodb(roundup( 401 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 402 DEV_BSIZE)); 403 break; 404 default: 405 /* error */ 406 return (-1); 407 } 408 409 *szp = len; 410 return (0); 411 } 412 413 /* 414 * FUNCTION: meta_sp_setgeom() 415 * INPUT: np - the underlying device to setup geometry for 416 * compnp - the underlying device to setup geometry for 417 * mp - the unit structure to set the geometry for 418 * OUTPUT: ep - return error pointer 419 * RETURNS: int - -1 if error, 0 otherwise 420 * PURPOSE: establishes geometry information for a device 421 */ 422 static int 423 meta_sp_setgeom( 424 mdname_t *np, 425 mdname_t *compnp, 426 mp_unit_t *mp, 427 md_error_t *ep 428 ) 429 { 430 mdgeom_t *geomp; 431 uint_t round_cyl = 0; 432 433 if ((geomp = metagetgeom(compnp, ep)) == NULL) 434 return (-1); 435 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 436 geomp->read_reinstruct, round_cyl, ep) != 0) 437 return (-1); 438 439 return (0); 440 } 441 442 /* 443 * FUNCTION: meta_sp_setstatus() 444 * INPUT: sp - the set name for the devices to set the status on 445 * minors - an array of minor numbers of devices to set status on 446 * num_units - number of entries in the array 447 * status - status value to set all units to 448 * OUTPUT: ep - return error pointer 449 * RETURNS: int - -1 if error, 0 success 450 * PURPOSE: sets the status of one or more soft partitions to the 451 * requested value 452 */ 453 int 454 meta_sp_setstatus( 455 mdsetname_t *sp, 456 minor_t *minors, 457 int num_units, 458 sp_status_t status, 459 md_error_t *ep 460 ) 461 { 462 md_sp_statusset_t status_params; 463 464 assert(minors != NULL); 465 466 /* update status of all soft partitions to the status passed in */ 467 (void) memset(&status_params, 0, sizeof (status_params)); 468 status_params.num_units = num_units; 469 status_params.new_status = status; 470 status_params.size = num_units * sizeof (minor_t); 471 status_params.minors = (uintptr_t)minors; 472 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 473 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 474 NULL) != 0) { 475 (void) mdstealerror(ep, &status_params.mde); 476 return (-1); 477 } 478 return (0); 479 } 480 481 /* 482 * FUNCTION: meta_get_sp_names() 483 * INPUT: sp - the set name to get soft partitions from 484 * options - options from the command line 485 * OUTPUT: nlpp - list of all soft partition names 486 * ep - return error pointer 487 * RETURNS: int - -1 if error, 0 success 488 * PURPOSE: returns a list of all soft partitions in the metadb 489 * for all devices in the specified set 490 */ 491 int 492 meta_get_sp_names( 493 mdsetname_t *sp, 494 mdnamelist_t **nlpp, 495 int options, 496 md_error_t *ep 497 ) 498 { 499 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 500 } 501 502 /* 503 * FUNCTION: meta_get_by_component() 504 * INPUT: sp - the set name to get soft partitions from 505 * compnp - the name of the device containing the soft 506 * partitions that will be returned 507 * force - 0 - reads cached namelist if available, 508 * 1 - reloads cached namelist, frees old namelist 509 * OUTPUT: nlpp - list of all soft partition names 510 * ep - return error pointer 511 * RETURNS: int - -1 error, otherwise the number of soft partitions 512 * found on the component (0 = none found). 513 * PURPOSE: returns a list of all soft partitions on a given device 514 * from the metadb information 515 */ 516 static int 517 meta_sp_get_by_component( 518 mdsetname_t *sp, 519 mdname_t *compnp, 520 mdnamelist_t **nlpp, 521 int force, 522 md_error_t *ep 523 ) 524 { 525 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 526 static int cached_count = 0; /* cached count */ 527 mdnamelist_t *spnlp = NULL; /* all sp names */ 528 mdnamelist_t *namep; /* list iterator */ 529 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 530 mdnamelist_t **cachetailpp; /* cache tail */ 531 md_sp_t *msp; /* unit structure */ 532 int count = 0; /* count of sp's */ 533 int err; 534 mdname_t *curnp; 535 536 if ((cached_list != NULL) && (!force)) { 537 /* return a copy of the cached list */ 538 for (namep = cached_list; namep != NULL; namep = namep->next) 539 tailpp = meta_namelist_append_wrapper(tailpp, 540 namep->namep); 541 return (cached_count); 542 } 543 544 /* free the cache and reset values to zeros to prepare for a new list */ 545 metafreenamelist(cached_list); 546 cached_count = 0; 547 cached_list = NULL; 548 cachetailpp = &cached_list; 549 *nlpp = NULL; 550 551 /* get all the softpartitions first of all */ 552 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 553 return (-1); 554 555 /* 556 * Now for each sp, see if it resides on the component we 557 * are interested in, if so then add it to our list 558 */ 559 for (namep = spnlp; namep != NULL; namep = namep->next) { 560 curnp = namep->namep; 561 562 /* get the unit structure */ 563 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 564 continue; 565 566 /* 567 * If the current soft partition is not on the same 568 * component, continue the search. If it is on the same 569 * component, add it to our namelist. 570 */ 571 err = meta_check_samedrive(compnp, msp->compnamep, ep); 572 if (err <= 0) { 573 /* not on the same device, check the next one */ 574 continue; 575 } 576 577 /* it's on the same drive */ 578 579 /* 580 * Check for overlapping partitions if the component is not 581 * a metadevice. 582 */ 583 if (!metaismeta(msp->compnamep)) { 584 /* 585 * if they're on the same drive, neither 586 * should be a metadevice if one isn't 587 */ 588 assert(!metaismeta(compnp)); 589 590 if (meta_check_overlap(msp->compnamep->cname, 591 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 592 continue; 593 594 /* in this case it's not an error for them to overlap */ 595 mdclrerror(ep); 596 } 597 598 /* Component is on the same device, add to the used list */ 599 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 600 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 601 curnp); 602 603 ++count; 604 ++cached_count; 605 } 606 607 assert(count == cached_count); 608 return (count); 609 610 out: 611 metafreenamelist(*nlpp); 612 *nlpp = NULL; 613 return (-1); 614 } 615 616 /* 617 * FUNCTION: meta_sp_get_default_alignment() 618 * INPUT: sp - the pertinent set name 619 * compnp - the name of the underlying component 620 * OUTPUT: ep - return error pointer 621 * RETURNS: sp_ext_length_t =0: no default alignment 622 * >0: default alignment 623 * PURPOSE: returns the default alignment for soft partitions to 624 * be built on top of the specified component or 625 * metadevice 626 */ 627 static sp_ext_length_t 628 meta_sp_get_default_alignment( 629 mdsetname_t *sp, 630 mdname_t *compnp, 631 md_error_t *ep 632 ) 633 { 634 sp_ext_length_t a = SP_UNALIGNED; 635 char *mname; 636 637 assert(compnp != NULL); 638 639 /* 640 * We treat raw devices as opaque, and assume nothing about 641 * their alignment requirements. 642 */ 643 if (!metaismeta(compnp)) 644 return (SP_UNALIGNED); 645 646 /* 647 * We already know it's a metadevice from the previous test; 648 * metagetmiscname() will tell us which metadevice type we 649 * have 650 */ 651 mname = metagetmiscname(compnp, ep); 652 if (mname == NULL) 653 goto out; 654 655 /* 656 * For a mirror, we want to deal with the stripe that is the 657 * primary side. If it happens to be asymmetrically 658 * configured, there is no simple way to fake a universal 659 * alignment. There's a chance that the least common 660 * denominator of the set of interlaces from all stripes of 661 * all submirrors would do it, but nobody that really cared 662 * that much about this issue would create an asymmetric 663 * config to start with. 664 * 665 * If the component underlying the soft partition is a mirror, 666 * then at the exit of this loop, compnp will have been 667 * updated to describe the first active submirror. 668 */ 669 if (strcmp(mname, MD_MIRROR) == 0) { 670 md_mirror_t *mp; 671 int smi; 672 md_submirror_t *smp; 673 674 mp = meta_get_mirror(sp, compnp, ep); 675 if (mp == NULL) 676 goto out; 677 678 for (smi = 0; smi < NMIRROR; smi++) { 679 680 smp = &mp->submirrors[smi]; 681 if (smp->state == SMS_UNUSED) 682 continue; 683 684 compnp = smp->submirnamep; 685 assert(compnp != NULL); 686 687 mname = metagetmiscname(compnp, ep); 688 if (mname == NULL) 689 goto out; 690 691 break; 692 } 693 694 if (smi == NMIRROR) 695 goto out; 696 } 697 698 /* 699 * Handle stripes and submirrors identically; just return the 700 * interlace of the first row. 701 */ 702 if (strcmp(mname, MD_STRIPE) == 0) { 703 md_stripe_t *stp; 704 705 stp = meta_get_stripe(sp, compnp, ep); 706 if (stp == NULL) 707 goto out; 708 709 a = stp->rows.rows_val[0].interlace; 710 goto out; 711 } 712 713 /* 714 * Raid is even more straightforward; the interlace applies to 715 * the entire device. 716 */ 717 if (strcmp(mname, MD_RAID) == 0) { 718 md_raid_t *rp; 719 720 rp = meta_get_raid(sp, compnp, ep); 721 if (rp == NULL) 722 goto out; 723 724 a = rp->interlace; 725 goto out; 726 } 727 728 /* 729 * If we have arrived here with the alignment still not set, 730 * then we expect the error to have been set by one of the 731 * routines we called. If neither is the case, something has 732 * really gone wrong above. (Probably the submirror walk 733 * failed to produce a valid submirror, but that would be 734 * really bad...) 735 */ 736 out: 737 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 738 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 739 740 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 741 mde_perror(ep, NULL); 742 } 743 744 assert((a > 0) || (!mdisok(ep))); 745 746 return (a); 747 } 748 749 750 751 /* 752 * FUNCTION: meta_check_insp() 753 * INPUT: sp - the set name for the device to check 754 * np - the name of the device to check 755 * slblk - the starting offset of the device to check 756 * nblks - the number of blocks in the device to check 757 * OUTPUT: ep - return error pointer 758 * RETURNS: int - 0 - device contains soft partitions 759 * -1 - device does not contain soft partitions 760 * PURPOSE: determines whether a device contains any soft partitions 761 */ 762 /* ARGSUSED */ 763 int 764 meta_check_insp( 765 mdsetname_t *sp, 766 mdname_t *np, 767 diskaddr_t slblk, 768 diskaddr_t nblks, 769 md_error_t *ep 770 ) 771 { 772 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 773 int count; 774 int rval; 775 776 /* check set pointer */ 777 assert(sp != NULL); 778 779 /* 780 * Get a list of the soft partitions that currently reside on 781 * the component. We should ALWAYS force reload the cache, 782 * because if we're using the md.tab, we must rebuild 783 * the list because it won't contain the previous (if any) 784 * soft partition. 785 */ 786 /* find all soft partitions on the component */ 787 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 788 789 if (count == -1) { 790 rval = -1; 791 } else if (count > 0) { 792 rval = mduseerror(ep, MDE_ALREADY, np->dev, 793 spnlp->namep->cname, np->cname); 794 } else { 795 rval = 0; 796 } 797 798 metafreenamelist(spnlp); 799 return (rval); 800 } 801 802 /* 803 * ************************************************************************** 804 * Extent List Manipulation Functions * 805 * ************************************************************************** 806 */ 807 808 /* 809 * FUNCTION: meta_sp_cmp_by_nameseq() 810 * INPUT: e1 - first node to compare 811 * e2 - second node to compare 812 * OUTPUT: none 813 * RETURNS: int - =0 - nodes are equal 814 * <0 - e1 should go before e2 815 * >0 - e1 should go after e2 816 * PURPOSE: used for sorted list inserts to build a list sorted by 817 * name first and sequence number second. 818 */ 819 static int 820 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 821 { 822 int rval; 823 824 if (e1->ext_namep == NULL) 825 return (1); 826 if (e2->ext_namep == NULL) 827 return (-1); 828 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 829 return (rval); 830 831 /* the names are equal, compare sequence numbers */ 832 if (e1->ext_seq > e2->ext_seq) 833 return (1); 834 if (e1->ext_seq < e2->ext_seq) 835 return (-1); 836 /* sequence numbers are also equal */ 837 return (0); 838 } 839 840 /* 841 * FUNCTION: meta_sp_cmp_by_offset() 842 * INPUT: e1 - first node to compare 843 * e2 - second node to compare 844 * OUTPUT: none 845 * RETURNS: int - =0 - nodes are equal 846 * <0 - e1 should go before e2 847 * >0 - e1 should go after e2 848 * PURPOSE: used for sorted list inserts to build a list sorted by offset 849 */ 850 static int 851 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 852 { 853 if (e1->ext_offset > e2->ext_offset) 854 return (1); 855 if (e1->ext_offset < e2->ext_offset) 856 return (-1); 857 /* offsets are equal */ 858 return (0); 859 } 860 861 /* 862 * FUNCTION: meta_sp_list_insert() 863 * INPUT: sp - the set name for the device the node belongs to 864 * np - the name of the device the node belongs to 865 * head - the head of the list, must be NULL for empty list 866 * offset - the physical offset of this extent in sectors 867 * length - the length of this extent in sectors 868 * type - the type of the extent being inserted 869 * seq - the sequence number of the extent being inserted 870 * flags - extent flags (eg. whether it needs to be updated) 871 * compare - the compare function to use 872 * OUTPUT: head - points to the new head if a node was inserted 873 * at the beginning 874 * RETURNS: void 875 * PURPOSE: inserts an extent node into a sorted doubly linked list. 876 * The sort order is determined by the compare function. 877 * Memory is allocated for the node in this function and it 878 * is up to the caller to free it, possibly using 879 * meta_sp_list_free(). If a node is inserted at the 880 * beginning of the list, the head pointer is updated to 881 * point to the new first node. 882 */ 883 static void 884 meta_sp_list_insert( 885 mdsetname_t *sp, 886 mdname_t *np, 887 sp_ext_node_t **head, 888 sp_ext_offset_t offset, 889 sp_ext_length_t length, 890 sp_ext_type_t type, 891 uint_t seq, 892 uint_t flags, 893 ext_cmpfunc_t compare 894 ) 895 { 896 sp_ext_node_t *newext; 897 sp_ext_node_t *curext; 898 899 assert(head != NULL); 900 901 /* Don't bother adding zero length nodes */ 902 if (length == 0ULL) 903 return; 904 905 /* allocate and fill in new ext_node */ 906 newext = Zalloc(sizeof (sp_ext_node_t)); 907 908 newext->ext_offset = offset; 909 newext->ext_length = length; 910 newext->ext_flags = flags; 911 newext->ext_type = type; 912 newext->ext_seq = seq; 913 newext->ext_setp = sp; 914 newext->ext_namep = np; 915 916 /* first node in the list */ 917 if (*head == NULL) { 918 newext->ext_next = newext->ext_prev = NULL; 919 *head = newext; 920 } else if ((*compare)(*head, newext) >= 0) { 921 /* the first node has a bigger offset, so insert before it */ 922 assert((*head)->ext_prev == NULL); 923 924 newext->ext_prev = NULL; 925 newext->ext_next = *head; 926 (*head)->ext_prev = newext; 927 *head = newext; 928 } else { 929 /* 930 * find the next node whose offset is greater than 931 * the one we want to insert, or the end of the list. 932 */ 933 for (curext = *head; 934 (curext->ext_next != NULL) && 935 ((*compare)(curext->ext_next, newext) < 0); 936 (curext = curext->ext_next)) 937 ; 938 939 /* link the new node in after the current node */ 940 newext->ext_next = curext->ext_next; 941 newext->ext_prev = curext; 942 943 if (curext->ext_next != NULL) 944 curext->ext_next->ext_prev = newext; 945 946 curext->ext_next = newext; 947 } 948 } 949 950 /* 951 * FUNCTION: meta_sp_list_free() 952 * INPUT: head - the head of the list, must be NULL for empty list 953 * OUTPUT: head - points to NULL on return 954 * RETURNS: void 955 * PURPOSE: walks a double linked extent list and frees each node 956 */ 957 static void 958 meta_sp_list_free(sp_ext_node_t **head) 959 { 960 sp_ext_node_t *ext; 961 sp_ext_node_t *next; 962 963 assert(head != NULL); 964 965 ext = *head; 966 while (ext) { 967 next = ext->ext_next; 968 Free(ext); 969 ext = next; 970 } 971 *head = NULL; 972 } 973 974 /* 975 * FUNCTION: meta_sp_list_remove() 976 * INPUT: head - the head of the list, must be NULL for empty list 977 * ext - the extent to remove, must be a member of the list 978 * OUTPUT: head - points to the new head of the list 979 * RETURNS: void 980 * PURPOSE: unlinks the node specified by ext from the list and 981 * frees it, possibly moving the head pointer forward if 982 * the head is the node being removed. 983 */ 984 static void 985 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 986 { 987 assert(head != NULL); 988 assert(*head != NULL); 989 990 if (*head == ext) 991 *head = ext->ext_next; 992 993 if (ext->ext_prev != NULL) 994 ext->ext_prev->ext_next = ext->ext_next; 995 if (ext->ext_next != NULL) 996 ext->ext_next->ext_prev = ext->ext_prev; 997 Free(ext); 998 } 999 1000 /* 1001 * FUNCTION: meta_sp_list_size() 1002 * INPUT: head - the head of the list, must be NULL for empty list 1003 * exttype - the type of the extents to sum 1004 * exclude_wm - subtract space for extent headers from total 1005 * OUTPUT: none 1006 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1007 * PURPOSE: sums the lengths of all extents in the list matching the 1008 * specified type. This could be used for computing the 1009 * amount of free or used space, for example. 1010 */ 1011 static sp_ext_length_t 1012 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1013 { 1014 sp_ext_node_t *ext; 1015 sp_ext_length_t size = 0LL; 1016 1017 for (ext = head; ext != NULL; ext = ext->ext_next) 1018 if (ext->ext_type == exttype) 1019 size += ext->ext_length - 1020 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1021 1022 return (size); 1023 } 1024 1025 /* 1026 * FUNCTION: meta_sp_list_find() 1027 * INPUT: head - the head of the list, must be NULL for empty list 1028 * offset - the offset contained by the node to find 1029 * OUTPUT: none 1030 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1031 * or NULL if no such nodes were found. 1032 * PURPOSE: finds a node in a list containing the requested offset 1033 * (inclusive). If multiple nodes contain this offset then 1034 * only the first will be returned, though typically these 1035 * lists are managed with non-overlapping nodes. 1036 * 1037 * *The list MUST be sorted by offset for this function to work.* 1038 */ 1039 static sp_ext_node_t * 1040 meta_sp_list_find( 1041 sp_ext_node_t *head, 1042 sp_ext_offset_t offset 1043 ) 1044 { 1045 sp_ext_node_t *ext; 1046 1047 for (ext = head; ext != NULL; ext = ext->ext_next) { 1048 /* check if the offset lies within this extent */ 1049 if ((offset >= ext->ext_offset) && 1050 (offset < ext->ext_offset + ext->ext_length)) { 1051 /* 1052 * the requested extent should always be a 1053 * subset of an extent in the list. 1054 */ 1055 return (ext); 1056 } 1057 } 1058 return (NULL); 1059 } 1060 1061 /* 1062 * FUNCTION: meta_sp_list_freefill() 1063 * INPUT: head - the head of the list, must be NULL for empty list 1064 * size - the size of the volume this extent list is 1065 * representing 1066 * OUTPUT: head - the new head of the list 1067 * RETURNS: void 1068 * PURPOSE: finds gaps in the extent list and fills them with a free 1069 * node. If there is a gap at the beginning the head 1070 * pointer will be changed to point to the new free node. 1071 * If there is free space at the end, the last free extent 1072 * will extend all the way out to the size specified. 1073 * 1074 * *The list MUST be sorted by offset for this function to work.* 1075 */ 1076 static void 1077 meta_sp_list_freefill( 1078 sp_ext_node_t **head, 1079 sp_ext_length_t size 1080 ) 1081 { 1082 sp_ext_node_t *ext; 1083 sp_ext_offset_t curoff = 0LL; 1084 1085 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1086 if (curoff < ext->ext_offset) 1087 meta_sp_list_insert(NULL, NULL, head, 1088 curoff, ext->ext_offset - curoff, 1089 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1090 curoff = ext->ext_offset + ext->ext_length; 1091 } 1092 1093 /* pad inverse list out to the end */ 1094 if (curoff < size) 1095 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1096 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1097 1098 if (getenv(META_SP_DEBUG)) { 1099 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1100 "holes freefilled:\n"); 1101 meta_sp_list_dump(*head); 1102 } 1103 } 1104 1105 /* 1106 * FUNCTION: meta_sp_list_dump() 1107 * INPUT: head - the head of the list, must be NULL for empty list 1108 * OUTPUT: none 1109 * RETURNS: void 1110 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1111 */ 1112 static void 1113 meta_sp_list_dump(sp_ext_node_t *head) 1114 { 1115 sp_ext_node_t *ext; 1116 1117 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1118 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1119 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1120 "Next"); 1121 for (ext = head; ext != NULL; ext = ext->ext_next) { 1122 if (ext->ext_namep != NULL) 1123 meta_sp_debug("%5s", ext->ext_namep->cname); 1124 else 1125 meta_sp_debug("%5s", "NONE"); 1126 1127 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1128 switch (ext->ext_type) { 1129 case EXTTYP_ALLOC: 1130 meta_sp_debug("%7s ", "ALLOC"); 1131 break; 1132 case EXTTYP_FREE: 1133 meta_sp_debug("%7s ", "FREE"); 1134 break; 1135 case EXTTYP_END: 1136 meta_sp_debug("%7s ", "END"); 1137 break; 1138 case EXTTYP_RESERVED: 1139 meta_sp_debug("%7s ", "RESV"); 1140 break; 1141 default: 1142 meta_sp_debug("%7s ", "INVLD"); 1143 break; 1144 } 1145 1146 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1147 ext->ext_offset, ext->ext_length, 1148 ext->ext_flags, (void *) ext->ext_prev, 1149 (void *) ext->ext_next); 1150 } 1151 meta_sp_debug("\n"); 1152 } 1153 1154 /* 1155 * FUNCTION: meta_sp_list_overlaps() 1156 * INPUT: head - the head of the list, must be NULL for empty list 1157 * OUTPUT: none 1158 * RETURNS: int - 1 if extents overlap, 0 if ok 1159 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1160 * offset for this function to work properly. 1161 */ 1162 static int 1163 meta_sp_list_overlaps(sp_ext_node_t *head) 1164 { 1165 sp_ext_node_t *ext; 1166 1167 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1168 if (ext->ext_offset + ext->ext_length > 1169 ext->ext_next->ext_offset) 1170 return (1); 1171 } 1172 return (0); 1173 } 1174 1175 /* 1176 * ************************************************************************** 1177 * Extent Allocation Functions * 1178 * ************************************************************************** 1179 */ 1180 1181 /* 1182 * FUNCTION: meta_sp_alloc_by_ext() 1183 * INPUT: sp - the set name for the device the node belongs to 1184 * np - the name of the device the node belongs to 1185 * head - the head of the list, must be NULL for empty list 1186 * free_ext - the free extent being allocated from 1187 * alloc_offset - the offset of the allocation 1188 * alloc_len - the length of the allocation 1189 * seq - the sequence number of the allocation 1190 * OUTPUT: head - the new head pointer 1191 * RETURNS: void 1192 * PURPOSE: allocates a portion of the free extent free_ext. The 1193 * allocated portion starts at alloc_offset and is 1194 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1195 * alloc_length) must be contained within the free extent. 1196 * 1197 * The free extent is split into as many as 3 pieces - a 1198 * free extent containing [ free_offset .. alloc_offset ), an 1199 * allocated extent containing the range [ alloc_offset .. 1200 * alloc_end ], and another free extent containing the 1201 * range ( alloc_end .. free_end ]. If either of the two 1202 * new free extents would be zero length, they are not created. 1203 * 1204 * Finally, the original free extent is removed. All newly 1205 * created extents have the EXTFLG_UPDATE flag set. 1206 */ 1207 static void 1208 meta_sp_alloc_by_ext( 1209 mdsetname_t *sp, 1210 mdname_t *np, 1211 sp_ext_node_t **head, 1212 sp_ext_node_t *free_ext, 1213 sp_ext_offset_t alloc_offset, 1214 sp_ext_length_t alloc_length, 1215 uint_t seq 1216 ) 1217 { 1218 sp_ext_offset_t free_offset = free_ext->ext_offset; 1219 sp_ext_length_t free_length = free_ext->ext_length; 1220 1221 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1222 sp_ext_offset_t free_end = free_offset + free_length; 1223 1224 /* allocated extent must be a subset of the free extent */ 1225 assert(free_offset <= alloc_offset); 1226 assert(free_end >= alloc_end); 1227 1228 meta_sp_list_remove(head, free_ext); 1229 1230 if (free_offset < alloc_offset) { 1231 meta_sp_list_insert(NULL, NULL, head, free_offset, 1232 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1233 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1234 } 1235 1236 if (free_end > alloc_end) { 1237 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1238 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1239 meta_sp_cmp_by_offset); 1240 } 1241 1242 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1243 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1244 1245 if (getenv(META_SP_DEBUG)) { 1246 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1247 meta_sp_list_dump(*head); 1248 } 1249 } 1250 1251 /* 1252 * FUNCTION: meta_sp_alloc_by_len() 1253 * INPUT: sp - the set name for the device the node belongs to 1254 * np - the name of the device the node belongs to 1255 * head - the head of the list, must be NULL for empty list 1256 * *lp - the requested length to allocate 1257 * last_off - the last offset already allocated. 1258 * alignment - the desired extent alignmeent 1259 * OUTPUT: head - the new head pointer 1260 * *lp - the length allocated 1261 * RETURNS: int - -1 if error, the number of new extents on success 1262 * PURPOSE: allocates extents from free space to satisfy the requested 1263 * length. If requested length is zero, allocates all 1264 * remaining free space. This function provides the meat 1265 * of the extent allocation algorithm. Allocation is a 1266 * three tier process: 1267 * 1268 * 1. If last_off is nonzero and there is free space following 1269 * that node, then it is extended to allocate as much of that 1270 * free space as possible. This is useful for metattach. 1271 * 2. If a free extent can be found to satisfy the remaining 1272 * requested space, then satisfy the rest of the request 1273 * from that extent. 1274 * 3. Start allocating space from any remaining free extents until 1275 * the remainder of the request is satisified. 1276 * 1277 * If alignment is non-zero, then every extent modified 1278 * or newly allocated will be aligned modulo alignment, 1279 * with a length that is an integer multiple of 1280 * alignment. 1281 * 1282 * The EXTFLG_UPDATE flag is set for all nodes (free and 1283 * allocated) that require updated watermarks. 1284 * 1285 * This algorithm may have a negative impact on fragmentation 1286 * in pathological cases and may be improved if it turns out 1287 * to be a problem. This may be exacerbated by particularly 1288 * large alignments. 1289 * 1290 * NOTE: It's confusing, so it demands an explanation: 1291 * - len is used to represent requested data space; it 1292 * does not include room for a watermark. On each full 1293 * or partial allocation, len will be decremented by 1294 * alloc_len (see next paragraph) until it reaches 1295 * zero. 1296 * - alloc_len is used to represent data space allocated 1297 * from a particular extent; it does not include space 1298 * for a watermark. In the rare event that a_length 1299 * (see next paragraph) is equal to MD_SP_WMSIZE, 1300 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1301 * fragment of space will be utterly unusable. 1302 * - a_length is used to represent all space to be 1303 * allocated from a particular extent; it DOES include 1304 * space for a watermark. 1305 */ 1306 static int 1307 meta_sp_alloc_by_len( 1308 mdsetname_t *sp, 1309 mdname_t *np, 1310 sp_ext_node_t **head, 1311 sp_ext_length_t *lp, 1312 sp_ext_offset_t last_off, 1313 sp_ext_offset_t alignment 1314 ) 1315 { 1316 sp_ext_node_t *free_ext; 1317 sp_ext_node_t *alloc_ext; 1318 uint_t last_seq = 0; 1319 uint_t numexts = 0; 1320 sp_ext_length_t freespace; 1321 sp_ext_length_t alloc_len; 1322 sp_ext_length_t len; 1323 1324 /* We're DOA if we can't read *lp */ 1325 assert(lp != NULL); 1326 len = *lp; 1327 1328 /* 1329 * Process the nominal case first: we've been given an actual 1330 * size argument, rather than the literal "all" 1331 */ 1332 1333 if (len != 0) { 1334 1335 /* 1336 * Short circuit the check for free space. This may 1337 * tell us we have enough space when we really don't 1338 * because each extent loses space to a watermark, but 1339 * it will always tell us there isn't enough space 1340 * correctly. Worst case we do some extra work. 1341 */ 1342 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1343 INCLUDE_WM); 1344 1345 if (freespace < len) 1346 return (-1); 1347 1348 /* 1349 * First see if we can extend the last extent for an 1350 * attach. 1351 */ 1352 if (last_off != 0LL) { 1353 int align = 0; 1354 1355 alloc_ext = 1356 meta_sp_list_find(*head, last_off); 1357 assert(alloc_ext != NULL); 1358 1359 /* 1360 * The offset test reflects the 1361 * inclusion of the watermark in the extent 1362 */ 1363 align = (alignment > 0) && 1364 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1365 alignment) == 0); 1366 1367 /* 1368 * If we decided not to align here, we should 1369 * also reset "alignment" so we don't bother 1370 * later, either. 1371 */ 1372 if (!align) { 1373 alignment = 0; 1374 } 1375 1376 last_seq = alloc_ext->ext_seq; 1377 1378 free_ext = meta_sp_list_find(*head, 1379 alloc_ext->ext_offset + 1380 alloc_ext->ext_length); 1381 1382 /* 1383 * If a free extent follows our last allocated 1384 * extent, then remove the last allocated 1385 * extent and increase the size of the free 1386 * extent to overlap it, then allocate the 1387 * total space from the new free extent. 1388 */ 1389 if (free_ext != NULL && 1390 free_ext->ext_type == EXTTYP_FREE) { 1391 assert(free_ext->ext_offset == 1392 alloc_ext->ext_offset + 1393 alloc_ext->ext_length); 1394 1395 alloc_len = 1396 MIN(len, free_ext->ext_length); 1397 1398 if (align && (alloc_len < len)) { 1399 /* No watermark space needed */ 1400 alloc_len -= alloc_len % alignment; 1401 } 1402 1403 if (alloc_len > 0) { 1404 free_ext->ext_offset -= 1405 alloc_ext->ext_length; 1406 free_ext->ext_length += 1407 alloc_ext->ext_length; 1408 1409 meta_sp_alloc_by_ext(sp, np, head, 1410 free_ext, free_ext->ext_offset, 1411 alloc_ext->ext_length + alloc_len, 1412 last_seq); 1413 1414 /* 1415 * now remove the original allocated 1416 * node. We may have overlapping 1417 * extents for a short time before 1418 * this node is removed. 1419 */ 1420 meta_sp_list_remove(head, alloc_ext); 1421 len -= alloc_len; 1422 } 1423 } 1424 last_seq++; 1425 } 1426 1427 if (len == 0LL) 1428 goto out; 1429 1430 /* 1431 * Next, see if we can find a single allocation for 1432 * the remainder. This may make fragmentation worse 1433 * in some cases, but there's no good way to allocate 1434 * that doesn't have a highly fragmented corner case. 1435 */ 1436 for (free_ext = *head; free_ext != NULL; 1437 free_ext = free_ext->ext_next) { 1438 sp_ext_offset_t a_offset; 1439 sp_ext_offset_t a_length; 1440 1441 if (free_ext->ext_type != EXTTYP_FREE) 1442 continue; 1443 1444 /* 1445 * The length test should include space for 1446 * the watermark 1447 */ 1448 1449 a_offset = free_ext->ext_offset; 1450 a_length = free_ext->ext_length; 1451 1452 if (alignment > 0) { 1453 1454 /* 1455 * Shortcut for extents that have been 1456 * previously added to pad out the 1457 * data space 1458 */ 1459 if (a_length < alignment) { 1460 continue; 1461 } 1462 1463 /* 1464 * Round up so the data space begins 1465 * on a properly aligned boundary. 1466 */ 1467 a_offset += alignment - 1468 (a_offset % alignment) - MD_SP_WMSIZE; 1469 1470 /* 1471 * This is only necessary in case the 1472 * watermark size is ever greater than 1473 * one. It'll never happen, of 1474 * course; we'll get rid of watermarks 1475 * before we make 'em bigger. 1476 */ 1477 if (a_offset < free_ext->ext_offset) { 1478 a_offset += alignment; 1479 } 1480 1481 /* 1482 * Adjust the length to account for 1483 * the space lost above (if any) 1484 */ 1485 a_length -= 1486 (a_offset - free_ext->ext_offset); 1487 } 1488 1489 if (a_length >= len + MD_SP_WMSIZE) { 1490 meta_sp_alloc_by_ext(sp, np, head, 1491 free_ext, a_offset, 1492 len + MD_SP_WMSIZE, last_seq); 1493 1494 len = 0LL; 1495 numexts++; 1496 break; 1497 } 1498 } 1499 1500 if (len == 0LL) 1501 goto out; 1502 1503 1504 /* 1505 * If the request could not be satisfied by extending 1506 * the last extent or by a single extent, then put 1507 * multiple smaller extents together until the request 1508 * is satisfied. 1509 */ 1510 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1511 free_ext = free_ext->ext_next) { 1512 sp_ext_offset_t a_offset; 1513 sp_ext_length_t a_length; 1514 1515 if (free_ext->ext_type != EXTTYP_FREE) 1516 continue; 1517 1518 a_offset = free_ext->ext_offset; 1519 a_length = free_ext->ext_length; 1520 1521 if (alignment > 0) { 1522 1523 /* 1524 * Shortcut for extents that have been 1525 * previously added to pad out the 1526 * data space 1527 */ 1528 if (a_length < alignment) { 1529 continue; 1530 } 1531 1532 /* 1533 * Round up so the data space begins 1534 * on a properly aligned boundary. 1535 */ 1536 a_offset += alignment - 1537 (a_offset % alignment) - MD_SP_WMSIZE; 1538 1539 /* 1540 * This is only necessary in case the 1541 * watermark size is ever greater than 1542 * one. It'll never happen, of 1543 * course; we'll get rid of watermarks 1544 * before we make 'em bigger. 1545 */ 1546 if (a_offset < free_ext->ext_offset) { 1547 a_offset += alignment; 1548 } 1549 1550 /* 1551 * Adjust the length to account for 1552 * the space lost above (if any) 1553 */ 1554 a_length -= 1555 (a_offset - free_ext->ext_offset); 1556 1557 /* 1558 * Adjust the length to be properly 1559 * aligned if it is NOT to be the 1560 * last extent in the soft partition. 1561 */ 1562 if ((a_length - MD_SP_WMSIZE) < len) 1563 a_length -= 1564 (a_length - MD_SP_WMSIZE) 1565 % alignment; 1566 } 1567 1568 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1569 if (alloc_len == 0) 1570 continue; 1571 1572 /* 1573 * meta_sp_alloc_by_ext() expects the 1574 * allocation length to include the watermark 1575 * size, which is why we don't simply pass in 1576 * alloc_len here. 1577 */ 1578 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1579 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1580 last_seq); 1581 1582 len -= alloc_len; 1583 numexts++; 1584 last_seq++; 1585 } 1586 1587 1588 /* 1589 * If there was not enough space we can throw it all 1590 * away since no real work has been done yet. 1591 */ 1592 if (len != 0) { 1593 meta_sp_list_free(head); 1594 return (-1); 1595 } 1596 } 1597 1598 /* 1599 * Otherwise, the literal "all" was specified: allocate all 1600 * available free space. Don't bother with alignment. 1601 */ 1602 else { 1603 /* First, extend the last extent if this is a grow */ 1604 if (last_off != 0LL) { 1605 alloc_ext = 1606 meta_sp_list_find(*head, last_off); 1607 assert(alloc_ext != NULL); 1608 1609 last_seq = alloc_ext->ext_seq; 1610 1611 free_ext = meta_sp_list_find(*head, 1612 alloc_ext->ext_offset + 1613 alloc_ext->ext_length); 1614 1615 /* 1616 * If a free extent follows our last allocated 1617 * extent, then remove the last allocated 1618 * extent and increase the size of the free 1619 * extent to overlap it, then allocate the 1620 * total space from the new free extent. 1621 */ 1622 if (free_ext != NULL && 1623 free_ext->ext_type == EXTTYP_FREE) { 1624 assert(free_ext->ext_offset == 1625 alloc_ext->ext_offset + 1626 alloc_ext->ext_length); 1627 1628 len = alloc_len = 1629 free_ext->ext_length; 1630 1631 free_ext->ext_offset -= 1632 alloc_ext->ext_length; 1633 free_ext->ext_length += 1634 alloc_ext->ext_length; 1635 1636 meta_sp_alloc_by_ext(sp, np, head, 1637 free_ext, free_ext->ext_offset, 1638 alloc_ext->ext_length + alloc_len, 1639 last_seq); 1640 1641 /* 1642 * now remove the original allocated 1643 * node. We may have overlapping 1644 * extents for a short time before 1645 * this node is removed. 1646 */ 1647 meta_sp_list_remove(head, alloc_ext); 1648 } 1649 1650 last_seq++; 1651 } 1652 1653 /* Next, grab all remaining free space */ 1654 for (free_ext = *head; free_ext != NULL; 1655 free_ext = free_ext->ext_next) { 1656 1657 if (free_ext->ext_type == EXTTYP_FREE) { 1658 alloc_len = 1659 free_ext->ext_length - MD_SP_WMSIZE; 1660 if (alloc_len == 0) 1661 continue; 1662 1663 /* 1664 * meta_sp_alloc_by_ext() expects the 1665 * allocation length to include the 1666 * watermark size, which is why we 1667 * don't simply pass in alloc_len 1668 * here. 1669 */ 1670 meta_sp_alloc_by_ext(sp, np, head, 1671 free_ext, free_ext->ext_offset, 1672 free_ext->ext_length, 1673 last_seq); 1674 1675 len += alloc_len; 1676 numexts++; 1677 last_seq++; 1678 } 1679 } 1680 } 1681 1682 out: 1683 if (getenv(META_SP_DEBUG)) { 1684 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1685 "allocation:\n"); 1686 meta_sp_list_dump(*head); 1687 } 1688 1689 if (*lp == 0) { 1690 *lp = len; 1691 1692 /* 1693 * Make sure the callers hit a no space error if we 1694 * didn't actually find anything. 1695 */ 1696 if (len == 0) { 1697 return (-1); 1698 } 1699 } 1700 1701 return (numexts); 1702 } 1703 1704 /* 1705 * FUNCTION: meta_sp_alloc_by_list() 1706 * INPUT: sp - the set name for the device the node belongs to 1707 * np - the name of the device the node belongs to 1708 * head - the head of the list, must be NULL for empty list 1709 * oblist - an extent list containing requested nodes to allocate 1710 * OUTPUT: head - the new head pointer 1711 * RETURNS: int - -1 if error, the number of new extents on success 1712 * PURPOSE: allocates extents from free space to satisfy the requested 1713 * extent list. This is primarily used for the -o/-b options 1714 * where the user may specifically request extents to allocate. 1715 * Each extent in the oblist must be a subset (inclusive) of a 1716 * free extent and may not overlap each other. This 1717 * function sets the EXTFLG_UPDATE flag for each node that 1718 * requires a watermark update after allocating. 1719 */ 1720 static int 1721 meta_sp_alloc_by_list( 1722 mdsetname_t *sp, 1723 mdname_t *np, 1724 sp_ext_node_t **head, 1725 sp_ext_node_t *oblist 1726 ) 1727 { 1728 sp_ext_node_t *ext; 1729 sp_ext_node_t *free_ext; 1730 uint_t numexts = 0; 1731 1732 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1733 1734 free_ext = meta_sp_list_find(*head, 1735 ext->ext_offset - MD_SP_WMSIZE); 1736 1737 /* Make sure the allocation is within the free extent */ 1738 if ((free_ext == NULL) || 1739 (ext->ext_offset + ext->ext_length > 1740 free_ext->ext_offset + free_ext->ext_length) || 1741 (free_ext->ext_type != EXTTYP_FREE)) 1742 return (-1); 1743 1744 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1745 ext->ext_offset - MD_SP_WMSIZE, 1746 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1747 1748 numexts++; 1749 } 1750 1751 assert(meta_sp_list_overlaps(*head) == 0); 1752 1753 if (getenv(META_SP_DEBUG)) { 1754 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1755 "allocation:\n"); 1756 meta_sp_list_dump(*head); 1757 } 1758 1759 return (numexts); 1760 } 1761 1762 /* 1763 * ************************************************************************** 1764 * Extent List Population Functions * 1765 * ************************************************************************** 1766 */ 1767 1768 /* 1769 * FUNCTION: meta_sp_extlist_from_namelist() 1770 * INPUT: sp - the set name for the device the node belongs to 1771 * spnplp - the namelist of soft partitions to build a list from 1772 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1773 * ep - return error pointer 1774 * RETURNS: int - -1 if error, 0 on success 1775 * PURPOSE: builds an extent list representing the soft partitions 1776 * specified in the namelist. Each extent in each soft 1777 * partition is added to the list with the type EXTTYP_ALLOC. 1778 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1779 * extent in the list includes the space occupied by the 1780 * watermark, which is not included in the unit structures. 1781 */ 1782 static int 1783 meta_sp_extlist_from_namelist( 1784 mdsetname_t *sp, 1785 mdnamelist_t *spnlp, 1786 sp_ext_node_t **extlist, 1787 md_error_t *ep 1788 ) 1789 { 1790 int extn; 1791 md_sp_t *msp; /* unit structure of the sp's */ 1792 mdnamelist_t *namep; 1793 1794 assert(sp != NULL); 1795 1796 /* 1797 * Now go through the soft partitions and add a node to the used 1798 * list for each allocated extent. 1799 */ 1800 for (namep = spnlp; namep != NULL; namep = namep->next) { 1801 mdname_t *curnp = namep->namep; 1802 1803 /* get the unit structure */ 1804 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1805 return (-1); 1806 1807 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1808 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1809 1810 /* 1811 * subtract from offset and add to the length 1812 * to account for the watermark, which is not 1813 * contained in the extents in the unit structure. 1814 */ 1815 meta_sp_list_insert(sp, curnp, extlist, 1816 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1817 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1818 } 1819 } 1820 return (0); 1821 } 1822 1823 /* 1824 * FUNCTION: meta_sp_extlist_from_wm() 1825 * INPUT: sp - the set name for the device the node belongs to 1826 * compnp - the name of the device to scan watermarks on 1827 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1828 * ep - return error pointer 1829 * RETURNS: int - -1 if error, 0 on success 1830 * PURPOSE: builds an extent list representing the soft partitions 1831 * specified in the namelist. Each extent in each soft 1832 * partition is added to the list with the type EXTTYP_ALLOC. 1833 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1834 * extent in the list includes the space occupied by the 1835 * watermark, which is not included in the unit structures. 1836 */ 1837 static int 1838 meta_sp_extlist_from_wm( 1839 mdsetname_t *sp, 1840 mdname_t *compnp, 1841 sp_ext_node_t **extlist, 1842 ext_cmpfunc_t compare, 1843 md_error_t *ep 1844 ) 1845 { 1846 mp_watermark_t wm; 1847 mdname_t *np = NULL; 1848 mdsetname_t *spsetp = NULL; 1849 sp_ext_offset_t cur_off; 1850 md_set_desc *sd; 1851 int init = 0; 1852 mdkey_t key; 1853 minor_t mnum; 1854 1855 if (!metaislocalset(sp)) { 1856 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1857 return (-1); 1858 } 1859 1860 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1861 return (-1); 1862 1863 for (;;) { 1864 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1865 return (-1); 1866 } 1867 1868 /* get the set and name pointers */ 1869 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1870 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1871 return (-1); 1872 } 1873 } 1874 1875 /* 1876 * For the MN set, meta_init_make_device needs to 1877 * be run on all the nodes so the entries for the 1878 * softpart device name and its comp can be created 1879 * in the same order in the replica namespace. If 1880 * we have it run on mdmn_do_iocset then the mddbs 1881 * will be out of sync between master node and slave 1882 * nodes. 1883 */ 1884 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1885 1886 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1887 md_mn_msg_addmdname_t *send_params; 1888 int result; 1889 md_mn_result_t *resp = NULL; 1890 int message_size; 1891 1892 message_size = sizeof (*send_params) + 1893 strlen(wm.wm_mdname) + 1; 1894 send_params = Zalloc(message_size); 1895 send_params->addmdname_setno = sp->setno; 1896 (void) strcpy(&send_params->addmdname_name[0], 1897 wm.wm_mdname); 1898 result = mdmn_send_message(sp->setno, 1899 MD_MN_MSG_ADDMDNAME, 1900 MD_MSGF_PANIC_WHEN_INCONSISTENT, 1901 (char *)send_params, message_size, &resp, 1902 ep); 1903 Free(send_params); 1904 if (resp != NULL) { 1905 if (resp->mmr_exitval != 0) { 1906 free_result(resp); 1907 return (-1); 1908 } 1909 free_result(resp); 1910 } 1911 if (result != 0) 1912 return (-1); 1913 } else { 1914 1915 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1916 if ((key = meta_init_make_device(&sp, 1917 wm.wm_mdname, ep)) <= 0) { 1918 return (-1); 1919 } 1920 init = 1; 1921 } 1922 } 1923 1924 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1925 if (np == NULL) { 1926 if (init) { 1927 if (meta_getnmentbykey(sp->setno, MD_SIDEWILD, 1928 key, NULL, &mnum, NULL, ep) != NULL) { 1929 (void) metaioctl(MD_IOCREM_DEV, &mnum, 1930 ep, NULL); 1931 } 1932 (void) del_self_name(sp, key, ep); 1933 } 1934 return (-1); 1935 } 1936 } 1937 1938 /* insert watermark into extent list */ 1939 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1940 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1941 EXTFLG_UPDATE, compare); 1942 1943 /* if we see the end watermark, we're done */ 1944 if (wm.wm_type == EXTTYP_END) 1945 break; 1946 1947 cur_off += wm.wm_length + 1; 1948 1949 /* clear out set and name pointers for next iteration */ 1950 np = NULL; 1951 spsetp = NULL; 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * ************************************************************************** 1959 * Print (metastat) Functions * 1960 * ************************************************************************** 1961 */ 1962 1963 /* 1964 * FUNCTION: meta_sp_short_print() 1965 * INPUT: msp - the unit structure to display 1966 * fp - the file pointer to send output to 1967 * options - print options from the command line processor 1968 * OUTPUT: ep - return error pointer 1969 * RETURNS: int - -1 if error, 0 on success 1970 * PURPOSE: display a short report of the soft partition in md.tab 1971 * form, primarily used for metastat -p. 1972 */ 1973 static int 1974 meta_sp_short_print( 1975 md_sp_t *msp, 1976 char *fname, 1977 FILE *fp, 1978 mdprtopts_t options, 1979 md_error_t *ep 1980 ) 1981 { 1982 int extn; 1983 1984 if (options & PRINT_LARGEDEVICES) { 1985 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1986 return (0); 1987 } 1988 1989 if (options & PRINT_FN) { 1990 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1991 return (0); 1992 } 1993 1994 /* print name and -p */ 1995 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1996 return (mdsyserror(ep, errno, fname)); 1997 1998 /* print the component */ 1999 /* 2000 * Always print the full path name 2001 */ 2002 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2003 return (mdsyserror(ep, errno, fname)); 2004 2005 /* print out each extent */ 2006 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2007 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2008 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2009 extp->len) == EOF) 2010 return (mdsyserror(ep, errno, fname)); 2011 } 2012 2013 if (fprintf(fp, "\n") == EOF) 2014 return (mdsyserror(ep, errno, fname)); 2015 2016 /* success */ 2017 return (0); 2018 } 2019 2020 /* 2021 * FUNCTION: meta_sp_status_to_name() 2022 * INPUT: xsp_status - the status value to convert to a string 2023 * tstate - transient errored device state. If set the 2024 * device is Unavailable 2025 * OUTPUT: none 2026 * RETURNS: char * - a pointer to the string representing the status value 2027 * PURPOSE: return an internationalized string representing the 2028 * status value for a soft partition. The strings are 2029 * strdup'd and must be freed by the caller. 2030 */ 2031 static char * 2032 meta_sp_status_to_name( 2033 xsp_status_t xsp_status, 2034 uint_t tstate 2035 ) 2036 { 2037 char *rval = NULL; 2038 2039 /* 2040 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2041 * value for an 'Unavailable' return. tstate can be set because of 2042 * other multi-node reasons (e.g. ABR being set) 2043 */ 2044 if (tstate & MD_INACCESSIBLE) { 2045 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2046 } 2047 2048 switch (xsp_status) { 2049 case MD_SP_CREATEPEND: 2050 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2051 break; 2052 case MD_SP_GROWPEND: 2053 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2054 break; 2055 case MD_SP_DELPEND: 2056 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2057 break; 2058 case MD_SP_OK: 2059 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2060 break; 2061 case MD_SP_ERR: 2062 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2063 break; 2064 case MD_SP_RECOVER: 2065 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2066 break; 2067 } 2068 2069 if (rval == NULL) 2070 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2071 2072 return (rval); 2073 } 2074 2075 /* 2076 * FUNCTION: meta_sp_report() 2077 * INPUT: sp - the set name for the unit being displayed 2078 * msp - the unit structure to display 2079 * nlpp - pass back the large devs 2080 * fp - the file pointer to send output to 2081 * options - print options from the command line processor 2082 * OUTPUT: ep - return error pointer 2083 * RETURNS: int - -1 if error, 0 on success 2084 * PURPOSE: print a full report of the device specified 2085 */ 2086 static int 2087 meta_sp_report( 2088 mdsetname_t *sp, 2089 md_sp_t *msp, 2090 mdnamelist_t **nlpp, 2091 char *fname, 2092 FILE *fp, 2093 mdprtopts_t options, 2094 md_error_t *ep 2095 ) 2096 { 2097 uint_t extn; 2098 char *status; 2099 char *devid = ""; 2100 mdname_t *didnp = NULL; 2101 ddi_devid_t dtp; 2102 int len; 2103 uint_t tstate = 0; 2104 2105 if (options & PRINT_LARGEDEVICES) { 2106 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2107 return (0); 2108 } else { 2109 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2110 return (-1); 2111 } 2112 } 2113 2114 if (options & PRINT_FN) { 2115 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2116 return (0); 2117 } else { 2118 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2119 return (-1); 2120 } 2121 } 2122 2123 if (options & PRINT_HEADER) { 2124 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2125 msp->common.namep->cname) == EOF) 2126 return (mdsyserror(ep, errno, fname)); 2127 } 2128 2129 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2130 msp->compnamep->cname) == EOF) 2131 return (mdsyserror(ep, errno, fname)); 2132 2133 /* Determine if device is available before displaying status */ 2134 if (metaismeta(msp->common.namep)) { 2135 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2136 return (-1); 2137 } 2138 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2139 2140 /* print out "State" to be consistent with other metadevices */ 2141 if (tstate & MD_ABR_CAP) { 2142 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2143 " State: %s - Application Based Recovery (ABR)\n"), 2144 status) == EOF) { 2145 Free(status); 2146 return (mdsyserror(ep, errno, fname)); 2147 } 2148 } else { 2149 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2150 " State: %s\n"), status) == EOF) { 2151 Free(status); 2152 return (mdsyserror(ep, errno, fname)); 2153 } 2154 } 2155 free(status); 2156 2157 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2158 msp->common.size, 2159 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2160 return (mdsyserror(ep, errno, fname)); 2161 2162 /* print component details */ 2163 if (! metaismeta(msp->compnamep)) { 2164 diskaddr_t start_blk; 2165 int has_mddb; 2166 char *has_mddb_str; 2167 2168 /* print header */ 2169 /* 2170 * Building a format string on the fly that will 2171 * be used in (f)printf. This allows the length 2172 * of the ctd to vary from small to large without 2173 * looking horrible. 2174 */ 2175 len = strlen(msp->compnamep->cname); 2176 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2177 len += 2; 2178 if (fprintf(fp, 2179 "\t%-*.*s %-12.12s %-5.5s %s\n", 2180 len, len, 2181 dgettext(TEXT_DOMAIN, "Device"), 2182 dgettext(TEXT_DOMAIN, "Start Block"), 2183 dgettext(TEXT_DOMAIN, "Dbase"), 2184 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2185 return (mdsyserror(ep, errno, fname)); 2186 } 2187 2188 2189 /* get info */ 2190 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2191 MD_DISKADDR_ERROR) 2192 return (-1); 2193 2194 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2195 return (-1); 2196 2197 if (has_mddb) 2198 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2199 else 2200 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2201 2202 /* populate the key in the name_p structure */ 2203 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2204 if (didnp == NULL) { 2205 return (-1); 2206 } 2207 2208 /* determine if devid does NOT exist */ 2209 if (options & PRINT_DEVID) { 2210 if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep), 2211 didnp->key, ep)) == NULL) 2212 devid = dgettext(TEXT_DOMAIN, "No "); 2213 else { 2214 devid = dgettext(TEXT_DOMAIN, "Yes"); 2215 free(dtp); 2216 } 2217 } 2218 2219 /* print info */ 2220 /* 2221 * This allows the length 2222 * of the ctd to vary from small to large without 2223 * looking horrible. 2224 */ 2225 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2226 len, msp->compnamep->cname, 2227 start_blk, has_mddb_str, devid) == EOF) { 2228 return (mdsyserror(ep, errno, fname)); 2229 } 2230 (void) fprintf(fp, "\n"); 2231 } 2232 2233 2234 /* print the headers */ 2235 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2236 dgettext(TEXT_DOMAIN, "Extent"), 2237 dgettext(TEXT_DOMAIN, "Start Block"), 2238 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2239 return (mdsyserror(ep, errno, fname)); 2240 2241 /* print out each extent */ 2242 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2243 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2244 2245 /* If PRINT_TIMES option is ever supported, add output here */ 2246 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2247 extn, extp->poff, extp->len) == EOF) 2248 return (mdsyserror(ep, errno, fname)); 2249 } 2250 2251 /* separate records with a newline */ 2252 (void) fprintf(fp, "\n"); 2253 return (0); 2254 } 2255 2256 /* 2257 * FUNCTION: meta_sp_print() 2258 * INPUT: sp - the set name for the unit being displayed 2259 * np - the name of the device to print 2260 * fname - ??? not used 2261 * fp - the file pointer to send output to 2262 * options - print options from the command line processor 2263 * OUTPUT: ep - return error pointer 2264 * RETURNS: int - -1 if error, 0 on success 2265 * PURPOSE: print a full report of the device specified by metastat. 2266 * This is the main entry point for printing. 2267 */ 2268 int 2269 meta_sp_print( 2270 mdsetname_t *sp, 2271 mdname_t *np, 2272 mdnamelist_t **nlpp, 2273 char *fname, 2274 FILE *fp, 2275 mdprtopts_t options, 2276 md_error_t *ep 2277 ) 2278 { 2279 md_sp_t *msp; 2280 md_unit_t *mdp; 2281 int rval = 0; 2282 2283 /* should always have the same set */ 2284 assert(sp != NULL); 2285 2286 /* print all the soft partitions */ 2287 if (np == NULL) { 2288 mdnamelist_t *nlp = NULL; 2289 mdnamelist_t *p; 2290 int cnt; 2291 2292 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2293 return (-1); 2294 else if (cnt == 0) 2295 return (0); 2296 2297 /* recusively print them out */ 2298 for (p = nlp; (p != NULL); p = p->next) { 2299 mdname_t *curnp = p->namep; 2300 2301 /* 2302 * one problem with the rval of -1 here is that 2303 * the error gets "lost" when the next device is 2304 * printed, but we want to print them all anyway. 2305 */ 2306 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2307 options, ep); 2308 } 2309 2310 /* clean up, return success */ 2311 metafreenamelist(nlp); 2312 return (rval); 2313 } 2314 2315 /* get the unit structure */ 2316 if ((msp = meta_get_sp_common(sp, np, 2317 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2318 return (-1); 2319 2320 /* check for parented */ 2321 if ((! (options & PRINT_SUBDEVS)) && 2322 (MD_HAS_PARENT(msp->common.parent))) { 2323 return (0); 2324 } 2325 2326 /* print appropriate detail */ 2327 if (options & PRINT_SHORT) { 2328 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2329 return (-1); 2330 } else { 2331 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2332 return (-1); 2333 } 2334 2335 /* 2336 * Print underlying metadevices if they are parented to us and 2337 * if the info for the underlying metadevice has not been printed. 2338 */ 2339 if (metaismeta(msp->compnamep)) { 2340 /* get the unit structure for the subdevice */ 2341 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2342 return (-1); 2343 2344 /* If info not already printed, recurse */ 2345 if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) { 2346 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2347 (options | PRINT_HEADER | PRINT_SUBDEVS), 2348 NULL, ep) != 0) { 2349 return (-1); 2350 } 2351 BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp))); 2352 } 2353 } 2354 return (0); 2355 } 2356 2357 /* 2358 * ************************************************************************** 2359 * Watermark Manipulation Functions * 2360 * ************************************************************************** 2361 */ 2362 2363 /* 2364 * FUNCTION: meta_sp_get_start() 2365 * INPUT: sp - the operating set 2366 * np - device upon which the sp is being built 2367 * OUTPUT: ep - return error pointer 2368 * RETURNS: daddr_t - -1 if error, otherwise the start block 2369 * PURPOSE: Encapsulate the determination of the start block of the 2370 * device upon which the sp is built or being built. 2371 * This is done to hide the ugliness of the algorithm. In 2372 * the case where a sp is being built upon a stripe of > 1 2373 * TB that is made up of a set of disks in which the first 2374 * has a VTOC label the result returned from the call to 2375 * metagetstart is incorrect. The reason being that a > 1 2376 * TB metadevice will manufacture an EFI label in which the 2377 * start address is zero. This is irrespective of the underlying 2378 * devices. The long term fix for this is to fix 2379 * meta_efi_to_mdvtoc and meta_efi_to mdgeom so that they return 2380 * values that are indicative of the first underlying device in 2381 * metadevice. 2382 */ 2383 static diskaddr_t 2384 meta_sp_get_start( 2385 mdsetname_t *sp, 2386 mdname_t *np, 2387 md_error_t *ep 2388 ) 2389 { 2390 daddr_t start_block; 2391 2392 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) { 2393 start_block += MD_SP_START; 2394 /* 2395 * In the case that the device upon which the sp is being 2396 * created is a metadevice then ensure that in the case that 2397 * the first underlying device has a vtoc label that it is 2398 * not overwritten with a watermark by setting the start block 2399 * to point just past the vtoc label 2400 */ 2401 if (start_block < VTOC_SIZE && metaismeta(np)) 2402 start_block = VTOC_SIZE; 2403 } 2404 2405 return (start_block); 2406 } 2407 2408 /* 2409 * FUNCTION: meta_sp_update_wm() 2410 * INPUT: sp - the operating set 2411 * msp - a pointer to the XDR unit structure 2412 * extlist - the extent list specifying watermarks to update 2413 * OUTPUT: ep - return error pointer 2414 * RETURNS: int - -1 if error, 0 on success 2415 * PURPOSE: steps backwards through the extent list updating 2416 * watermarks for all extents with the EXTFLG_UPDATE flag 2417 * set. Writing the watermarks guarantees consistency when 2418 * extents must be broken into pieces since the original 2419 * watermark will be the last to be updated, and will be 2420 * changed to point to a new watermark that is already 2421 * known to be consistent. If one of the writes fails, the 2422 * original watermark stays intact and none of the changes 2423 * are realized. 2424 */ 2425 static int 2426 meta_sp_update_wm( 2427 mdsetname_t *sp, 2428 md_sp_t *msp, 2429 sp_ext_node_t *extlist, 2430 md_error_t *ep 2431 ) 2432 { 2433 sp_ext_node_t *ext; 2434 sp_ext_node_t *tail; 2435 mp_watermark_t *wmp, *watermarks; 2436 xsp_offset_t *osp, *offsets; 2437 int update_count = 0; 2438 int rval = 0; 2439 md_unit_t *mdp; 2440 md_sp_update_wm_t update_params; 2441 2442 if (getenv(META_SP_DEBUG)) { 2443 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2444 meta_sp_list_dump(extlist); 2445 } 2446 2447 /* 2448 * find the last node so we can write the watermarks backwards 2449 * and count watermarks to update so we can allocate space 2450 */ 2451 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2452 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2453 update_count++; 2454 } 2455 2456 if (ext->ext_next == NULL) { 2457 tail = ext; 2458 } 2459 } 2460 ext = tail; 2461 2462 wmp = watermarks = 2463 Zalloc(update_count * sizeof (mp_watermark_t)); 2464 osp = offsets = 2465 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2466 2467 while (ext != NULL) { 2468 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2469 /* update watermark */ 2470 wmp->wm_magic = MD_SP_MAGIC; 2471 wmp->wm_version = MD_SP_VERSION; 2472 wmp->wm_type = ext->ext_type; 2473 wmp->wm_seq = ext->ext_seq; 2474 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2475 2476 /* fill in the volume name and set name */ 2477 if (ext->ext_namep != NULL) 2478 (void) strcpy(wmp->wm_mdname, 2479 ext->ext_namep->cname); 2480 else 2481 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2482 if (ext->ext_setp != NULL && 2483 ext->ext_setp->setno != MD_LOCAL_SET) 2484 (void) strcpy(wmp->wm_setname, 2485 ext->ext_setp->setname); 2486 else 2487 (void) strcpy(wmp->wm_setname, 2488 MD_SP_LOCALSETNAME); 2489 2490 /* Generate the checksum */ 2491 wmp->wm_checksum = 0; 2492 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2493 sizeof (*wmp), NULL); 2494 2495 /* record the extent offset */ 2496 *osp = ext->ext_offset; 2497 2498 /* Advance the placeholders */ 2499 osp++; wmp++; 2500 } 2501 ext = ext->ext_prev; 2502 } 2503 2504 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2505 if (mdp == NULL) { 2506 rval = -1; 2507 goto out; 2508 } 2509 2510 (void) memset(&update_params, 0, sizeof (update_params)); 2511 update_params.mnum = MD_SID(mdp); 2512 update_params.count = update_count; 2513 update_params.wmp = (uintptr_t)watermarks; 2514 update_params.osp = (uintptr_t)offsets; 2515 MD_SETDRIVERNAME(&update_params, MD_SP, 2516 MD_MIN2SET(update_params.mnum)); 2517 2518 if (metaioctl(MD_IOC_SPUPDATEWM, &update_params, 2519 &update_params.mde, msp->common.namep->cname) != 0) { 2520 (void) mdstealerror(ep, &update_params.mde); 2521 rval = -1; 2522 goto out; 2523 } 2524 2525 out: 2526 Free(watermarks); 2527 Free(offsets); 2528 2529 return (rval); 2530 } 2531 2532 /* 2533 * FUNCTION: meta_sp_clear_wm() 2534 * INPUT: sp - the operating set 2535 * msp - the unit structure for the soft partition to clear 2536 * OUTPUT: ep - return error pointer 2537 * RETURNS: int - -1 if error, 0 on success 2538 * PURPOSE: steps through the extents for a soft partition unit and 2539 * creates an extent list designed to mark all of the 2540 * watermarks for those extents as free. The extent list 2541 * is then passed to meta_sp_update_wm() to actually write 2542 * the watermarks out. 2543 */ 2544 static int 2545 meta_sp_clear_wm( 2546 mdsetname_t *sp, 2547 md_sp_t *msp, 2548 md_error_t *ep 2549 ) 2550 { 2551 sp_ext_node_t *extlist = NULL; 2552 int numexts = msp->ext.ext_len; 2553 uint_t i; 2554 int rval = 0; 2555 2556 /* for each watermark must set the flag to SP_FREE */ 2557 for (i = 0; i < numexts; i++) { 2558 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2559 2560 meta_sp_list_insert(NULL, NULL, &extlist, 2561 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2562 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2563 } 2564 2565 /* update watermarks */ 2566 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2567 2568 meta_sp_list_free(&extlist); 2569 return (rval); 2570 } 2571 2572 /* 2573 * FUNCTION: meta_sp_read_wm() 2574 * INPUT: sp - setname for component 2575 * compnp - mdname_t for component 2576 * offset - the offset of the watermark to read (sectors) 2577 * OUTPUT: wm - the watermark structure to read into 2578 * ep - return error pointer 2579 * RETURNS: int - -1 if error, 0 on success 2580 * PURPOSE: seeks out to the requested offset and reads a watermark. 2581 * It then verifies that the magic number is correct and 2582 * that the checksum is valid, returning an error if either 2583 * is wrong. 2584 */ 2585 static int 2586 meta_sp_read_wm( 2587 mdsetname_t *sp, 2588 mdname_t *compnp, 2589 mp_watermark_t *wm, 2590 sp_ext_offset_t offset, 2591 md_error_t *ep 2592 ) 2593 { 2594 md_sp_read_wm_t read_params; 2595 2596 /* 2597 * make sure block offset does not overflow 2^64 bytes and it's a 2598 * multiple of the block size. 2599 */ 2600 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2601 /* LINTED */ 2602 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2603 2604 (void) memset(wm, 0, sizeof (*wm)); 2605 2606 (void) memset(&read_params, 0, sizeof (read_params)); 2607 read_params.rdev = compnp->dev; 2608 read_params.wmp = (uintptr_t)wm; 2609 read_params.offset = offset; 2610 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2611 2612 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2613 &read_params.mde, compnp->cname) != 0) { 2614 2615 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2616 "Extent header read failed, block %llu.\n"), offset); 2617 return (mdstealerror(ep, &read_params.mde)); 2618 } 2619 2620 /* make sure magic number is correct */ 2621 if (wm->wm_magic != MD_SP_MAGIC) { 2622 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2623 "found incorrect magic number %x, expected %x.\n"), 2624 wm->wm_magic, MD_SP_MAGIC); 2625 /* 2626 * Pass NULL for the device name as we don't have 2627 * valid watermark contents. 2628 */ 2629 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2630 } 2631 2632 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2633 sizeof (*wm), NULL)) { 2634 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2635 "found incorrect checksum %x.\n"), 2636 wm->wm_checksum); 2637 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2638 } 2639 2640 return (0); 2641 } 2642 2643 /* 2644 * ************************************************************************** 2645 * Query Functions 2646 * ************************************************************************** 2647 */ 2648 2649 /* 2650 * IMPORTANT NOTE: This is a static function that assumes that 2651 * its input parameters have been checked and 2652 * have valid values that lie within acceptable 2653 * ranges. 2654 * 2655 * FUNCTION: meta_sp_enough_space() 2656 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2657 * must be > 0 2658 * desired_sp_size - the desired soft partition size in blocks; 2659 * must be > 0 2660 * extent_listpp - a reference to a reference to an extent 2661 * list that lists the extents on a device; 2662 * must be a reference to a reference to a 2663 * valid extent list 2664 * alignment - the desired data space alignment for the sp's 2665 * OUTPUT: boolean_t return value 2666 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2667 * list to create the desired soft partitions, 2668 * B_FALSE if there's not enough space 2669 * PURPOSE: determines whether there's enough free space in an extent 2670 * list to allow creation of a set of soft partitions 2671 */ 2672 static boolean_t 2673 meta_sp_enough_space( 2674 int desired_number_of_sps, 2675 blkcnt_t desired_sp_size, 2676 sp_ext_node_t **extent_listpp, 2677 sp_ext_length_t alignment 2678 ) 2679 { 2680 boolean_t enough_space; 2681 int number_of_sps; 2682 int number_of_extents_used; 2683 sp_ext_length_t desired_ext_length = desired_sp_size; 2684 2685 enough_space = B_TRUE; 2686 number_of_sps = 0; 2687 while ((enough_space == B_TRUE) && 2688 (number_of_sps < desired_number_of_sps)) { 2689 /* 2690 * Use the extent allocation algorithm implemented by 2691 * meta_sp_alloc_by_len() to test whether the free 2692 * extents in the extent list referenced by *extent_listpp 2693 * contain enough space to accomodate a soft partition 2694 * of size desired_ext_length. 2695 * 2696 * Repeat the test <desired_number_of_sps> times 2697 * or until it fails, whichever comes first, 2698 * each time allocating the extents required to 2699 * create the soft partition without actually 2700 * creating the soft partition. 2701 */ 2702 number_of_extents_used = meta_sp_alloc_by_len( 2703 TEST_SETNAMEP, 2704 TEST_SOFT_PARTITION_NAMEP, 2705 extent_listpp, 2706 &desired_ext_length, 2707 NO_OFFSET, 2708 alignment); 2709 if (number_of_extents_used == -1) { 2710 enough_space = B_FALSE; 2711 } else { 2712 number_of_sps++; 2713 } 2714 } 2715 return (enough_space); 2716 } 2717 2718 /* 2719 * IMPORTANT NOTE: This is a static function that calls other functions 2720 * that check its mdsetnamep and device_mdnamep 2721 * input parameters, but expects extent_listpp to 2722 * be a initialized to a valid address to which 2723 * it can write a reference to the extent list that 2724 * it creates. 2725 * 2726 * FUNCTION: meta_sp_get_extent_list() 2727 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2728 * for the set containing the device for 2729 * which the extents are to be listed 2730 * device_mdnamep - a reference to the mdname_t structure 2731 * for the device for which the extents 2732 * are to be listed 2733 * OUTPUT: *extent_listpp - a reference to the extent list for 2734 * the device; NULL if the function fails 2735 * *ep - the libmeta error encountered, if any 2736 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2737 * B_FALSE if not 2738 * PURPOSE: gets the extent list for a device 2739 */ 2740 static boolean_t 2741 meta_sp_get_extent_list( 2742 mdsetname_t *mdsetnamep, 2743 mdname_t *device_mdnamep, 2744 sp_ext_node_t **extent_listpp, 2745 md_error_t *ep 2746 ) 2747 { 2748 diskaddr_t device_size_in_blocks; 2749 mdnamelist_t *sp_name_listp; 2750 diskaddr_t start_block_address_in_blocks; 2751 2752 *extent_listpp = NULL; 2753 sp_name_listp = NULL; 2754 2755 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2756 device_mdnamep, 2757 ep); 2758 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2759 if (getenv(META_SP_DEBUG)) { 2760 mde_perror(ep, "meta_sp_get_extent_list:meta_sp_get_start"); 2761 } 2762 return (B_FALSE); 2763 } 2764 2765 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2766 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2767 if (getenv(META_SP_DEBUG)) { 2768 mde_perror(ep, 2769 "meta_sp_get_extent_list:metagetsize"); 2770 } 2771 return (B_FALSE); 2772 } 2773 2774 /* 2775 * Sanity check: the start block will have skipped an integer 2776 * number of cylinders, C. C will usually be zero. If (C > 0), 2777 * and the disk slice happens to only be C cylinders in total 2778 * size, we'll fail this check. 2779 */ 2780 if (device_size_in_blocks <= 2781 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2782 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2783 return (B_FALSE); 2784 } 2785 2786 /* 2787 * After this point, we will have allocated resources, so any 2788 * failure returns must be through the supplied "fail" label 2789 * to properly deallocate things. 2790 */ 2791 2792 /* 2793 * Create an empty extent list that starts one watermark past 2794 * the start block of the device and ends one watermark before 2795 * the end of the device. 2796 */ 2797 meta_sp_list_insert(TEST_SETNAMEP, 2798 TEST_SOFT_PARTITION_NAMEP, 2799 extent_listpp, 2800 NO_OFFSET, 2801 (sp_ext_length_t)start_block_address_in_blocks, 2802 EXTTYP_RESERVED, 2803 NO_SEQUENCE_NUMBER, 2804 NO_FLAGS, 2805 meta_sp_cmp_by_offset); 2806 meta_sp_list_insert(TEST_SETNAMEP, 2807 TEST_SOFT_PARTITION_NAMEP, 2808 extent_listpp, 2809 (sp_ext_offset_t)(device_size_in_blocks - 2810 MD_SP_WMSIZE), 2811 MD_SP_WMSIZE, 2812 EXTTYP_END, 2813 NO_SEQUENCE_NUMBER, 2814 NO_FLAGS, 2815 meta_sp_cmp_by_offset); 2816 2817 /* 2818 * Get the list of soft partitions that are already on the 2819 * device. 2820 */ 2821 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2822 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2823 if (getenv(META_SP_DEBUG)) { 2824 mde_perror(ep, 2825 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2826 } 2827 goto fail; 2828 } 2829 2830 if (sp_name_listp != NULL) { 2831 /* 2832 * If there are soft partitions on the device, add the 2833 * extents used in them to the extent list. 2834 */ 2835 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2836 extent_listpp, ep) == -1) { 2837 if (getenv(META_SP_DEBUG)) { 2838 mde_perror(ep, "meta_sp_get_extent_list:" 2839 "meta_sp_extlist_from_namelist"); 2840 } 2841 goto fail; 2842 } 2843 metafreenamelist(sp_name_listp); 2844 } 2845 2846 /* 2847 * Add free extents to the extent list to represent 2848 * the remaining regions of free space on the 2849 * device. 2850 */ 2851 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2852 return (B_TRUE); 2853 2854 fail: 2855 if (sp_name_listp != NULL) { 2856 metafreenamelist(sp_name_listp); 2857 } 2858 2859 if (*extent_listpp != NULL) { 2860 /* 2861 * meta_sp_list_free sets *extent_listpp to NULL. 2862 */ 2863 meta_sp_list_free(extent_listpp); 2864 } 2865 return (B_FALSE); 2866 } 2867 2868 /* 2869 * IMPORTANT NOTE: This is a static function that calls other functions 2870 * that check its mdsetnamep and mddrivenamep 2871 * input parameters, but expects extent_listpp to 2872 * be a initialized to a valid address to which 2873 * it can write a reference to the extent list that 2874 * it creates. 2875 * 2876 * FUNCTION: meta_sp_get_extent_list_for_drive() 2877 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2878 * for the set containing the drive for 2879 * which the extents are to be listed 2880 * mddrivenamep - a reference to the mddrivename_t structure 2881 * for the drive for which the extents 2882 * are to be listed 2883 * OUTPUT: *extent_listpp - a reference to the extent list for 2884 * the drive; NULL if the function fails 2885 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2886 * B_FALSE if not 2887 * PURPOSE: gets the extent list for a drive when the entire drive 2888 * is to be soft partitioned 2889 */ 2890 static boolean_t 2891 meta_sp_get_extent_list_for_drive( 2892 mdsetname_t *mdsetnamep, 2893 mddrivename_t *mddrivenamep, 2894 sp_ext_node_t **extent_listpp 2895 ) 2896 { 2897 boolean_t can_use; 2898 diskaddr_t free_space; 2899 md_error_t mderror; 2900 mdvtoc_t proposed_vtoc; 2901 int repartition_options; 2902 int return_value; 2903 md_sp_t test_sp_struct; 2904 2905 can_use = B_TRUE; 2906 *extent_listpp = NULL; 2907 mderror = mdnullerror; 2908 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2909 &mderror); 2910 if (test_sp_struct.compnamep == NULL) { 2911 can_use = B_FALSE; 2912 } 2913 2914 if (can_use == B_TRUE) { 2915 mderror = mdnullerror; 2916 repartition_options = 0; 2917 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2918 MDCMD_USE_WHOLE_DISK, &repartition_options, 2919 &mderror); 2920 if (return_value != 0) { 2921 can_use = B_FALSE; 2922 } 2923 } 2924 2925 if (can_use == B_TRUE) { 2926 mderror = mdnullerror; 2927 repartition_options = repartition_options | 2928 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2929 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2930 repartition_options, &proposed_vtoc, &mderror); 2931 if (return_value != 0) { 2932 can_use = B_FALSE; 2933 } 2934 } 2935 2936 if (can_use == B_TRUE) { 2937 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2938 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2939 can_use = B_FALSE; 2940 } 2941 } 2942 2943 if (can_use == B_TRUE) { 2944 /* 2945 * Create an extent list that starts with 2946 * a reserved extent that ends at the start 2947 * of the usable space on slice zero of the 2948 * proposed VTOC, ends with an extent that 2949 * reserves space for a watermark at the end 2950 * of slice zero, and contains a single free 2951 * extent that occupies the rest of the space 2952 * on the slice. 2953 * 2954 * NOTE: 2955 * 2956 * Don't use metagetstart() or metagetsize() to 2957 * find the usable space. They query the mdname_t 2958 * structure that represents an actual device to 2959 * determine the amount of space on the device that 2960 * contains metadata and the total amount of space 2961 * on the device. Since this function creates a 2962 * proposed extent list that doesn't reflect the 2963 * state of an actual device, there's no mdname_t 2964 * structure to be queried. 2965 * 2966 * When a drive is reformatted to prepare for 2967 * soft partitioning, all of slice seven is 2968 * reserved for metadata, all of slice zero is 2969 * available for soft partitioning, and all other 2970 * slices on the drive are empty. The proposed 2971 * extent list for the drive therefore contains 2972 * only three extents: a reserved extent that ends 2973 * at the start of the usable space on slice zero, 2974 * a single free extent that occupies all the usable 2975 * space on slice zero, and an ending extent that 2976 * reserves space for a watermark at the end of 2977 * slice zero. 2978 */ 2979 meta_sp_list_insert(TEST_SETNAMEP, 2980 TEST_SOFT_PARTITION_NAMEP, 2981 extent_listpp, 2982 NO_OFFSET, 2983 (sp_ext_length_t)(MD_SP_START), 2984 EXTTYP_RESERVED, 2985 NO_SEQUENCE_NUMBER, 2986 NO_FLAGS, 2987 meta_sp_cmp_by_offset); 2988 meta_sp_list_insert(TEST_SETNAMEP, 2989 TEST_SOFT_PARTITION_NAMEP, 2990 extent_listpp, 2991 (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2992 MD_SP_WMSIZE, 2993 EXTTYP_END, 2994 NO_SEQUENCE_NUMBER, 2995 NO_FLAGS, 2996 meta_sp_cmp_by_offset); 2997 meta_sp_list_freefill(extent_listpp, free_space); 2998 } 2999 return (can_use); 3000 } 3001 3002 /* 3003 * FUNCTION: meta_sp_can_create_sps() 3004 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3005 * for the set containing the device for 3006 * which the extents are to be listed 3007 * mdnamep - a reference to the mdname_t of the device 3008 * on which the soft parititions are to be created 3009 * number_of_sps - the desired number of soft partitions 3010 * sp_size - the desired soft partition size 3011 * OUTPUT: boolean_t return value 3012 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3013 * B_FALSE if not 3014 * PURPOSE: determines whether a set of soft partitions can be created 3015 * on a device 3016 */ 3017 boolean_t 3018 meta_sp_can_create_sps( 3019 mdsetname_t *mdsetnamep, 3020 mdname_t *mdnamep, 3021 int number_of_sps, 3022 blkcnt_t sp_size 3023 ) 3024 { 3025 sp_ext_node_t *extent_listp; 3026 boolean_t succeeded; 3027 md_error_t mde; 3028 3029 if ((number_of_sps > 0) && (sp_size > 0)) { 3030 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3031 &extent_listp, &mde); 3032 } else { 3033 succeeded = B_FALSE; 3034 } 3035 3036 /* 3037 * We don't really care about an error return from the 3038 * alignment call; that will just result in passing zero, 3039 * which will be interpreted as no alignment. 3040 */ 3041 3042 if (succeeded == B_TRUE) { 3043 succeeded = meta_sp_enough_space(number_of_sps, 3044 sp_size, &extent_listp, 3045 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3046 meta_sp_list_free(&extent_listp); 3047 } 3048 return (succeeded); 3049 } 3050 3051 /* 3052 * FUNCTION: meta_sp_can_create_sps_on_drive() 3053 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3054 * for the set containing the drive for 3055 * which the extents are to be listed 3056 * mddrivenamep - a reference to the mddrivename_t of the drive 3057 * on which the soft parititions are to be created 3058 * number_of_sps - the desired number of soft partitions 3059 * sp_size - the desired soft partition size 3060 * OUTPUT: boolean_t return value 3061 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3062 * B_FALSE if not 3063 * PURPOSE: determines whether a set of soft partitions can be created 3064 * on a drive if the entire drive is soft partitioned 3065 */ 3066 boolean_t 3067 meta_sp_can_create_sps_on_drive( 3068 mdsetname_t *mdsetnamep, 3069 mddrivename_t *mddrivenamep, 3070 int number_of_sps, 3071 blkcnt_t sp_size 3072 ) 3073 { 3074 sp_ext_node_t *extent_listp; 3075 boolean_t succeeded; 3076 3077 if ((number_of_sps > 0) && (sp_size > 0)) { 3078 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3079 mddrivenamep, 3080 &extent_listp); 3081 } else { 3082 succeeded = B_FALSE; 3083 } 3084 3085 /* 3086 * We don't care about alignment on the space call because 3087 * we're specifically dealing with a drive, which will have no 3088 * inherent alignment. 3089 */ 3090 3091 if (succeeded == B_TRUE) { 3092 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3093 &extent_listp, SP_UNALIGNED); 3094 meta_sp_list_free(&extent_listp); 3095 } 3096 return (succeeded); 3097 } 3098 3099 /* 3100 * FUNCTION: meta_sp_get_free_space() 3101 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3102 * for the set containing the device for 3103 * which the free space is to be returned 3104 * mdnamep - a reference to the mdname_t of the device 3105 * for which the free space is to be returned 3106 * OUTPUT: blkcnt_t return value 3107 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3108 * PURPOSE: returns the number of blocks of free space on a device 3109 */ 3110 blkcnt_t 3111 meta_sp_get_free_space( 3112 mdsetname_t *mdsetnamep, 3113 mdname_t *mdnamep 3114 ) 3115 { 3116 sp_ext_node_t *extent_listp; 3117 sp_ext_length_t free_blocks; 3118 boolean_t succeeded; 3119 md_error_t mde; 3120 3121 extent_listp = NULL; 3122 free_blocks = 0; 3123 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3124 &extent_listp, &mde); 3125 if (succeeded == B_TRUE) { 3126 free_blocks = meta_sp_list_size(extent_listp, 3127 EXTTYP_FREE, INCLUDE_WM); 3128 meta_sp_list_free(&extent_listp); 3129 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3130 /* 3131 * Subtract a safety margin for watermarks when 3132 * computing the number of blocks available for 3133 * use. The actual number of watermarks can't 3134 * be calculated without knowing the exact numbers 3135 * and sizes of both the free extents and the soft 3136 * partitions to be created. The calculation is 3137 * highly complex and error-prone even if those 3138 * quantities are known. The approximate value 3139 * 10 * MD_SP_WMSIZE is within a few blocks of the 3140 * correct value in all practical cases. 3141 */ 3142 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3143 } else { 3144 free_blocks = 0; 3145 } 3146 } else { 3147 mdclrerror(&mde); 3148 } 3149 3150 return (free_blocks); 3151 } 3152 3153 /* 3154 * FUNCTION: meta_sp_get_free_space_on_drive() 3155 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3156 * for the set containing the drive for 3157 * which the free space is to be returned 3158 * mddrivenamep - a reference to the mddrivename_t of the drive 3159 * for which the free space is to be returned 3160 * OUTPUT: blkcnt_t return value 3161 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3162 * PURPOSE: returns the number of blocks of space usable for soft 3163 * partitions on an entire drive, if the entire drive is 3164 * soft partitioned 3165 */ 3166 blkcnt_t 3167 meta_sp_get_free_space_on_drive( 3168 mdsetname_t *mdsetnamep, 3169 mddrivename_t *mddrivenamep 3170 ) 3171 { 3172 sp_ext_node_t *extent_listp; 3173 sp_ext_length_t free_blocks; 3174 boolean_t succeeded; 3175 3176 extent_listp = NULL; 3177 free_blocks = 0; 3178 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3179 mddrivenamep, &extent_listp); 3180 if (succeeded == B_TRUE) { 3181 free_blocks = meta_sp_list_size(extent_listp, 3182 EXTTYP_FREE, INCLUDE_WM); 3183 meta_sp_list_free(&extent_listp); 3184 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3185 /* 3186 * Subtract a safety margin for watermarks when 3187 * computing the number of blocks available for 3188 * use. The actual number of watermarks can't 3189 * be calculated without knowing the exact numbers 3190 * and sizes of both the free extents and the soft 3191 * partitions to be created. The calculation is 3192 * highly complex and error-prone even if those 3193 * quantities are known. The approximate value 3194 * 10 * MD_SP_WMSIZE is within a few blocks of the 3195 * correct value in all practical cases. 3196 */ 3197 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3198 } else { 3199 free_blocks = 0; 3200 } 3201 } 3202 return (free_blocks); 3203 } 3204 3205 /* 3206 * FUNCTION: meta_sp_get_number_of_possible_sps() 3207 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3208 * for the set containing the device for 3209 * which the number of possible soft partitions 3210 * is to be returned 3211 * mdnamep - a reference to the mdname_t of the device 3212 * for which the number of possible soft partitions 3213 * is to be returned 3214 * OUTPUT: int return value 3215 * RETURNS: int - the number of soft partitions of the desired size 3216 * that can be created on the device 3217 * PURPOSE: returns the number of soft partitions of a given size 3218 * that can be created on a device 3219 */ 3220 int 3221 meta_sp_get_number_of_possible_sps( 3222 mdsetname_t *mdsetnamep, 3223 mdname_t *mdnamep, 3224 blkcnt_t sp_size 3225 ) 3226 { 3227 sp_ext_node_t *extent_listp; 3228 int number_of_possible_sps; 3229 boolean_t succeeded; 3230 md_error_t mde; 3231 sp_ext_length_t alignment; 3232 3233 extent_listp = NULL; 3234 number_of_possible_sps = 0; 3235 if (sp_size > 0) { 3236 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3237 mdnamep, &extent_listp, &mde)) == B_FALSE) 3238 mdclrerror(&mde); 3239 } else { 3240 succeeded = B_FALSE; 3241 } 3242 3243 if (succeeded == B_TRUE) { 3244 alignment = meta_sp_get_default_alignment(mdsetnamep, 3245 mdnamep, &mde); 3246 } 3247 3248 while (succeeded == B_TRUE) { 3249 /* 3250 * Keep allocating space from the extent list 3251 * for soft partitions of the desired size until 3252 * there's not enough free space left in the list 3253 * for another soft partiition of that size. 3254 * Add one to the number of possible soft partitions 3255 * for each soft partition for which there is 3256 * enough free space left. 3257 */ 3258 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3259 sp_size, &extent_listp, alignment); 3260 if (succeeded == B_TRUE) { 3261 number_of_possible_sps++; 3262 } 3263 } 3264 if (extent_listp != NULL) { 3265 meta_sp_list_free(&extent_listp); 3266 } 3267 return (number_of_possible_sps); 3268 } 3269 3270 /* 3271 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3272 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3273 * for the set containing the drive for 3274 * which the number of possible soft partitions 3275 * is to be returned 3276 * mddrivenamep - a reference to the mddrivename_t of the drive 3277 * for which the number of possible soft partitions 3278 * is to be returned 3279 * sp_size - the size in blocks of the proposed soft partitions 3280 * OUTPUT: int return value 3281 * RETURNS: int - the number of soft partitions of the desired size 3282 * that can be created on the drive 3283 * PURPOSE: returns the number of soft partitions of a given size 3284 * that can be created on a drive, if the entire drive is 3285 * soft partitioned 3286 */ 3287 int 3288 meta_sp_get_number_of_possible_sps_on_drive( 3289 mdsetname_t *mdsetnamep, 3290 mddrivename_t *mddrivenamep, 3291 blkcnt_t sp_size 3292 ) 3293 { 3294 sp_ext_node_t *extent_listp; 3295 int number_of_possible_sps; 3296 boolean_t succeeded; 3297 3298 extent_listp = NULL; 3299 number_of_possible_sps = 0; 3300 if (sp_size > 0) { 3301 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3302 mddrivenamep, &extent_listp); 3303 } else { 3304 succeeded = B_FALSE; 3305 } 3306 while (succeeded == B_TRUE) { 3307 /* 3308 * Keep allocating space from the extent list 3309 * for soft partitions of the desired size until 3310 * there's not enough free space left in the list 3311 * for another soft partition of that size. 3312 * Add one to the number of possible soft partitions 3313 * for each soft partition for which there is 3314 * enough free space left. 3315 * 3316 * Since it's a drive, not a metadevice, make no 3317 * assumptions about alignment. 3318 */ 3319 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3320 sp_size, &extent_listp, SP_UNALIGNED); 3321 if (succeeded == B_TRUE) { 3322 number_of_possible_sps++; 3323 } 3324 } 3325 if (extent_listp != NULL) { 3326 meta_sp_list_free(&extent_listp); 3327 } 3328 return (number_of_possible_sps); 3329 } 3330 3331 /* 3332 * FUNCTION: meta_sp_get_possible_sp_size() 3333 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3334 * for the set containing the device for 3335 * which the possible soft partition size 3336 * is to be returned 3337 * mdnamep - a reference to the mdname_t of the device 3338 * for which the possible soft partition size 3339 * is to be returned 3340 * number_of_sps - the desired number of soft partitions 3341 * OUTPUT: blkcnt_t return value 3342 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3343 * PURPOSE: returns the maximum possible size of each of a given number of 3344 * soft partitions of equal size that can be created on a device 3345 */ 3346 blkcnt_t 3347 meta_sp_get_possible_sp_size( 3348 mdsetname_t *mdsetnamep, 3349 mdname_t *mdnamep, 3350 int number_of_sps 3351 ) 3352 { 3353 blkcnt_t free_blocks; 3354 blkcnt_t sp_size; 3355 boolean_t succeeded; 3356 3357 sp_size = 0; 3358 if (number_of_sps > 0) { 3359 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3360 sp_size = free_blocks / number_of_sps; 3361 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3362 number_of_sps, sp_size); 3363 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3364 /* 3365 * To compensate for space that may have been 3366 * occupied by watermarks, reduce sp_size by a 3367 * number of blocks equal to the number of soft 3368 * partitions desired, and test again to see 3369 * whether the desired number of soft partitions 3370 * can be created. 3371 */ 3372 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3373 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3374 number_of_sps, sp_size); 3375 } 3376 if (sp_size < 0) { 3377 sp_size = 0; 3378 } 3379 } 3380 return (sp_size); 3381 } 3382 3383 /* 3384 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3385 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3386 * for the set containing the drive for 3387 * which the possible soft partition size 3388 * is to be returned 3389 * mddrivenamep - a reference to the mddrivename_t of the drive 3390 * for which the possible soft partition size 3391 * is to be returned 3392 * number_of_sps - the desired number of soft partitions 3393 * OUTPUT: blkcnt_t return value 3394 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3395 * PURPOSE: returns the maximum possible size of each of a given number of 3396 * soft partitions of equal size that can be created on a drive 3397 * if the entire drive is soft partitioned 3398 */ 3399 blkcnt_t 3400 meta_sp_get_possible_sp_size_on_drive( 3401 mdsetname_t *mdsetnamep, 3402 mddrivename_t *mddrivenamep, 3403 int number_of_sps 3404 ) 3405 { 3406 blkcnt_t free_blocks; 3407 blkcnt_t sp_size; 3408 boolean_t succeeded; 3409 3410 sp_size = 0; 3411 if (number_of_sps > 0) { 3412 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3413 mddrivenamep); 3414 sp_size = free_blocks / number_of_sps; 3415 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3416 mddrivenamep, 3417 number_of_sps, sp_size); 3418 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3419 /* 3420 * To compensate for space that may have been 3421 * occupied by watermarks, reduce sp_size by a 3422 * number of blocks equal to the number of soft 3423 * partitions desired, and test again to see 3424 * whether the desired number of soft partitions 3425 * can be created. 3426 */ 3427 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3428 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3429 mddrivenamep, 3430 number_of_sps, sp_size); 3431 } 3432 if (sp_size < 0) { 3433 sp_size = 0; 3434 } 3435 } 3436 return (sp_size); 3437 } 3438 3439 /* 3440 * ************************************************************************** 3441 * Unit Structure Manipulation Functions * 3442 * ************************************************************************** 3443 */ 3444 3445 /* 3446 * FUNCTION: meta_sp_fillextarray() 3447 * INPUT: mp - the unit structure to fill 3448 * extlist - the list of extents to fill with 3449 * OUTPUT: none 3450 * RETURNS: void 3451 * PURPOSE: fills in the unit structure extent list with the extents 3452 * specified by extlist. Only extents in extlist with the 3453 * EXTFLG_UPDATE flag are changed in the unit structure, 3454 * and the index into the unit structure is the sequence 3455 * number in the extent list. After all of the nodes have 3456 * been updated the virtual offsets in the unit structure 3457 * are updated to reflect the new lengths. 3458 */ 3459 static void 3460 meta_sp_fillextarray( 3461 mp_unit_t *mp, 3462 sp_ext_node_t *extlist 3463 ) 3464 { 3465 int i; 3466 sp_ext_node_t *ext; 3467 sp_ext_offset_t curvoff = 0LL; 3468 3469 assert(mp != NULL); 3470 3471 /* go through the allocation list and fill in our unit structure */ 3472 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3473 if ((ext->ext_type == EXTTYP_ALLOC) && 3474 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3475 mp->un_ext[ext->ext_seq].un_poff = 3476 ext->ext_offset + MD_SP_WMSIZE; 3477 mp->un_ext[ext->ext_seq].un_len = 3478 ext->ext_length - MD_SP_WMSIZE; 3479 } 3480 } 3481 3482 for (i = 0; i < mp->un_numexts; i++) { 3483 assert(mp->un_ext[i].un_poff != 0); 3484 assert(mp->un_ext[i].un_len != 0); 3485 mp->un_ext[i].un_voff = curvoff; 3486 curvoff += mp->un_ext[i].un_len; 3487 } 3488 } 3489 3490 /* 3491 * FUNCTION: meta_sp_createunit() 3492 * INPUT: np - the name of the device to create a unit structure for 3493 * compnp - the name of the device the soft partition is on 3494 * extlist - the extent list to populate the new unit with 3495 * numexts - the number of extents in the extent list 3496 * len - the total size of the soft partition (sectors) 3497 * status - the initial status of the unit structure 3498 * OUTPUT: ep - return error pointer 3499 * RETURNS: mp_unit_t * - the new unit structure. 3500 * PURPOSE: allocates and fills in a new soft partition unit 3501 * structure to be passed to the soft partitioning driver 3502 * for creation. 3503 */ 3504 static mp_unit_t * 3505 meta_sp_createunit( 3506 mdname_t *np, 3507 mdname_t *compnp, 3508 sp_ext_node_t *extlist, 3509 int numexts, 3510 sp_ext_length_t len, 3511 sp_status_t status, 3512 md_error_t *ep 3513 ) 3514 { 3515 mp_unit_t *mp; 3516 uint_t ms_size; 3517 3518 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3519 (numexts * sizeof (mp->un_ext[0])); 3520 3521 mp = Zalloc(ms_size); 3522 3523 /* fill in fields in common unit structure */ 3524 mp->c.un_type = MD_METASP; 3525 mp->c.un_size = ms_size; 3526 MD_SID(mp) = meta_getminor(np->dev); 3527 mp->c.un_total_blocks = len; 3528 mp->c.un_actual_tb = len; 3529 3530 /* set up geometry */ 3531 (void) meta_sp_setgeom(np, compnp, mp, ep); 3532 3533 /* if we're building on metadevice we can't parent */ 3534 if (metaismeta(compnp)) 3535 MD_CAPAB(mp) = MD_CANT_PARENT; 3536 else 3537 MD_CAPAB(mp) = MD_CAN_PARENT; 3538 3539 /* fill soft partition-specific fields */ 3540 mp->un_dev = compnp->dev; 3541 mp->un_key = compnp->key; 3542 3543 /* mdname_t start_blk field is not 64-bit! */ 3544 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3545 mp->un_status = status; 3546 mp->un_numexts = numexts; 3547 mp->un_length = len; 3548 3549 /* fill in the extent array */ 3550 meta_sp_fillextarray(mp, extlist); 3551 3552 return (mp); 3553 } 3554 3555 /* 3556 * FUNCTION: meta_sp_updateunit() 3557 * INPUT: np - name structure for the metadevice being updated 3558 * old_un - the original unit structure that is being updated 3559 * extlist - the extent list to populate the new unit with 3560 * grow_len - the amount by which the partition is being grown 3561 * numexts - the number of extents in the extent list 3562 * ep - return error pointer 3563 * OUTPUT: none 3564 * RETURNS: mp_unit_t * - the updated unit structure 3565 * PURPOSE: allocates and fills in a new soft partition unit structure to 3566 * be passed to the soft partitioning driver for creation. The 3567 * old unit structure is first copied in, and then the updated 3568 * extents are changed in the new unit structure. This is 3569 * typically used when the size of an existing unit is changed. 3570 */ 3571 static mp_unit_t * 3572 meta_sp_updateunit( 3573 mdname_t *np, 3574 mp_unit_t *old_un, 3575 sp_ext_node_t *extlist, 3576 sp_ext_length_t grow_len, 3577 int numexts, 3578 md_error_t *ep 3579 ) 3580 { 3581 mp_unit_t *new_un; 3582 sp_ext_length_t new_len; 3583 uint_t new_size; 3584 3585 assert(old_un != NULL); 3586 assert(extlist != NULL); 3587 3588 /* allocate new unit structure and copy in old unit */ 3589 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3590 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3591 new_len = old_un->un_length + grow_len; 3592 new_un = Zalloc(new_size); 3593 bcopy(old_un, new_un, old_un->c.un_size); 3594 3595 /* update size and geometry information */ 3596 new_un->c.un_size = new_size; 3597 new_un->un_length = new_len; 3598 new_un->c.un_total_blocks = new_len; 3599 new_un->c.un_actual_tb = new_len; 3600 if (meta_adjust_geom((md_unit_t *)new_un, np, 3601 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3602 0, ep) != 0) { 3603 Free(new_un); 3604 return (NULL); 3605 } 3606 3607 /* update extent information */ 3608 new_un->un_numexts += numexts; 3609 3610 meta_sp_fillextarray(new_un, extlist); 3611 3612 return (new_un); 3613 } 3614 3615 /* 3616 * FUNCTION: meta_get_sp() 3617 * INPUT: sp - the set name for the device to get 3618 * np - the name of the device to get 3619 * OUTPUT: ep - return error pointer 3620 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3621 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3622 * for the named device. Just a wrapper for meta_get_sp_common(). 3623 */ 3624 md_sp_t * 3625 meta_get_sp( 3626 mdsetname_t *sp, 3627 mdname_t *np, 3628 md_error_t *ep 3629 ) 3630 { 3631 return (meta_get_sp_common(sp, np, 0, ep)); 3632 } 3633 3634 /* 3635 * FUNCTION: meta_get_sp_common() 3636 * INPUT: sp - the set name for the device to get 3637 * np - the name of the device to get 3638 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3639 * OUTPUT: ep - return error pointer 3640 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3641 * NULL if np is not a soft partition 3642 * PURPOSE: common routine for fetching a soft partition unit structure 3643 */ 3644 md_sp_t * 3645 meta_get_sp_common( 3646 mdsetname_t *sp, 3647 mdname_t *np, 3648 int fast, 3649 md_error_t *ep 3650 ) 3651 { 3652 mddrivename_t *dnp = np->drivenamep; 3653 char *miscname; 3654 mp_unit_t *mp; 3655 md_sp_t *msp; 3656 int i; 3657 3658 /* must have set */ 3659 assert(sp != NULL); 3660 3661 /* short circuit */ 3662 if (dnp->unitp != NULL) { 3663 if (dnp->unitp->type != MD_METASP) 3664 return (NULL); 3665 return ((md_sp_t *)dnp->unitp); 3666 } 3667 /* get miscname and unit */ 3668 if ((miscname = metagetmiscname(np, ep)) == NULL) 3669 return (NULL); 3670 3671 if (strcmp(miscname, MD_SP) != 0) { 3672 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3673 return (NULL); 3674 } 3675 3676 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3677 return (NULL); 3678 3679 assert(mp->c.un_type == MD_METASP); 3680 3681 /* allocate soft partition */ 3682 msp = Zalloc(sizeof (*msp)); 3683 3684 /* get the common information */ 3685 msp->common.namep = np; 3686 msp->common.type = mp->c.un_type; 3687 msp->common.state = mp->c.un_status; 3688 msp->common.capabilities = mp->c.un_capabilities; 3689 msp->common.parent = mp->c.un_parent; 3690 msp->common.size = mp->c.un_total_blocks; 3691 msp->common.user_flags = mp->c.un_user_flags; 3692 msp->common.revision = mp->c.un_revision; 3693 3694 /* get soft partition information */ 3695 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3696 goto out; 3697 3698 /* 3699 * Fill in the key and the start block. Note that the start 3700 * block in the unit structure is 64 bits but the name pointer 3701 * only supports 32 bits. 3702 */ 3703 msp->compnamep->key = mp->un_key; 3704 msp->compnamep->start_blk = mp->un_start_blk; 3705 3706 /* fill in status field */ 3707 msp->status = mp->un_status; 3708 3709 /* allocate the extents */ 3710 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3711 msp->ext.ext_len = mp->un_numexts; 3712 3713 /* do the extents for this soft partition */ 3714 for (i = 0; i < mp->un_numexts; i++) { 3715 struct mp_ext *mde = &mp->un_ext[i]; 3716 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3717 3718 extp->voff = mde->un_voff; 3719 extp->poff = mde->un_poff; 3720 extp->len = mde->un_len; 3721 } 3722 3723 /* cleanup, return success */ 3724 Free(mp); 3725 dnp->unitp = (md_common_t *)msp; 3726 return (msp); 3727 3728 out: 3729 /* clean up and return error */ 3730 Free(mp); 3731 Free(msp); 3732 return (NULL); 3733 } 3734 3735 3736 /* 3737 * FUNCTION: meta_init_sp() 3738 * INPUT: spp - the set name for the new device 3739 * argc - the remaining argument count for the metainit cmdline 3740 * argv - the remainder of the unparsed command line 3741 * options - global options parsed by metainit 3742 * OUTPUT: ep - return error pointer 3743 * RETURNS: int - -1 failure, 0 success 3744 * PURPOSE: provides the command line parsing and name management overhead 3745 * for creating a new soft partition. Ultimately this calls 3746 * meta_create_sp() which does the real work of allocating space 3747 * for the new soft partition. 3748 */ 3749 int 3750 meta_init_sp( 3751 mdsetname_t **spp, 3752 int argc, 3753 char *argv[], 3754 mdcmdopts_t options, 3755 md_error_t *ep 3756 ) 3757 { 3758 char *compname = NULL; 3759 mdname_t *spcompnp = NULL; /* name of component volume */ 3760 char *devname = argv[0]; /* unit name */ 3761 mdname_t *np = NULL; /* name of soft partition */ 3762 md_sp_t *msp = NULL; 3763 int c; 3764 int old_optind; 3765 sp_ext_length_t len = 0LL; 3766 int rval = -1; 3767 uint_t seq; 3768 int oflag; 3769 int failed; 3770 mddrivename_t *dnp = NULL; 3771 sp_ext_length_t alignment = 0LL; 3772 sp_ext_node_t *extlist = NULL; 3773 3774 assert(argc > 0); 3775 3776 /* expect sp name, -p, optional -e, compname, and size parameters */ 3777 /* grab soft partition name */ 3778 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3779 goto out; 3780 3781 /* see if it exists already */ 3782 if (metagetmiscname(np, ep) != NULL) { 3783 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3784 meta_getminor(np->dev), devname); 3785 goto out; 3786 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3787 goto out; 3788 } else { 3789 mdclrerror(ep); 3790 } 3791 --argc, ++argv; 3792 3793 if (argc == 0) 3794 goto syntax; 3795 3796 /* grab -p */ 3797 if (strcmp(argv[0], "-p") != 0) 3798 goto syntax; 3799 --argc, ++argv; 3800 3801 if (argc == 0) 3802 goto syntax; 3803 3804 /* see if -e is there */ 3805 if (strcmp(argv[0], "-e") == 0) { 3806 /* use the whole disk */ 3807 options |= MDCMD_USE_WHOLE_DISK; 3808 --argc, ++argv; 3809 } 3810 3811 if (argc == 0) 3812 goto syntax; 3813 3814 /* get component name */ 3815 compname = Strdup(argv[0]); 3816 3817 if (options & MDCMD_USE_WHOLE_DISK) { 3818 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3819 goto out; 3820 } 3821 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3822 goto out; 3823 } 3824 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3825 goto out; 3826 } 3827 assert(*spp != NULL); 3828 3829 if (!(options & MDCMD_NOLOCK)) { 3830 /* grab set lock */ 3831 if (meta_lock(*spp, TRUE, ep)) 3832 goto out; 3833 3834 if (meta_check_ownership(*spp, ep) != 0) 3835 goto out; 3836 } 3837 3838 /* allocate the soft partition */ 3839 msp = Zalloc(sizeof (*msp)); 3840 3841 /* setup common */ 3842 msp->common.namep = np; 3843 msp->common.type = MD_METASP; 3844 3845 compname = spcompnp->cname; 3846 3847 assert(spcompnp->rname != NULL); 3848 --argc, ++argv; 3849 3850 if (argc == 0) { 3851 goto syntax; 3852 } 3853 3854 if (*argv[0] == '-') { 3855 /* 3856 * parse any other command line options, this includes 3857 * the recovery options -o and -b. The special thing 3858 * with these options is that the len needs to be 3859 * kept track of otherwise when the geometry of the 3860 * "device" is built it will create an invalid geometry 3861 */ 3862 old_optind = optind = 0; 3863 opterr = 0; 3864 oflag = 0; 3865 seq = 0; 3866 failed = 0; 3867 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3868 sp_ext_offset_t offset; 3869 sp_ext_length_t length; 3870 longlong_t tmp_size; 3871 3872 switch (c) { 3873 case 'A': /* data alignment */ 3874 if (meta_sp_parsesizestring(optarg, 3875 &alignment) == -1) { 3876 failed = 1; 3877 } 3878 break; 3879 case 'o': /* offset in the partition */ 3880 if (oflag == 1) { 3881 failed = 1; 3882 } else { 3883 tmp_size = atoll(optarg); 3884 if (tmp_size <= 0) { 3885 failed = 1; 3886 } else { 3887 oflag = 1; 3888 options |= MDCMD_DIRECT; 3889 3890 offset = tmp_size; 3891 } 3892 } 3893 3894 break; 3895 case 'b': /* number of blocks */ 3896 if (oflag == 0) { 3897 failed = 1; 3898 } else { 3899 tmp_size = atoll(optarg); 3900 if (tmp_size <= 0) { 3901 failed = 1; 3902 } else { 3903 oflag = 0; 3904 3905 length = tmp_size; 3906 3907 /* we have a pair of values */ 3908 meta_sp_list_insert(*spp, np, 3909 &extlist, offset, 3910 length, EXTTYP_ALLOC, 3911 seq++, EXTFLG_UPDATE, 3912 meta_sp_cmp_by_offset); 3913 len += length; 3914 } 3915 } 3916 3917 break; 3918 default: 3919 argc -= old_optind; 3920 argv += old_optind; 3921 goto options; 3922 } 3923 3924 if (failed) { 3925 argc -= old_optind; 3926 argv += old_optind; 3927 goto syntax; 3928 } 3929 3930 old_optind = optind; 3931 } 3932 argc -= optind; 3933 argv += optind; 3934 3935 /* 3936 * Must have matching pairs of -o and -b flags 3937 */ 3938 if (oflag != 0) 3939 goto syntax; 3940 3941 /* 3942 * Can't specify both layout (indicated indirectly by 3943 * len being set by thye -o/-b cases above) AND 3944 * alignment 3945 */ 3946 if ((len > 0LL) && (alignment > 0LL)) 3947 goto syntax; 3948 3949 /* 3950 * sanity check the allocation list 3951 */ 3952 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3953 goto syntax; 3954 } 3955 3956 if (len == 0LL) { 3957 if (argc == 0) 3958 goto syntax; 3959 if (meta_sp_parsesize(argv[0], &len) == -1) 3960 goto syntax; 3961 --argc, ++argv; 3962 } 3963 3964 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3965 msp->ext.ext_val->len = len; 3966 msp->compnamep = spcompnp; 3967 3968 /* we should be at the end */ 3969 if (argc != 0) 3970 goto syntax; 3971 3972 /* create soft partition */ 3973 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3974 goto out; 3975 rval = 0; 3976 3977 /* let em know */ 3978 if (options & MDCMD_PRINT) { 3979 (void) printf(dgettext(TEXT_DOMAIN, 3980 "%s: Soft Partition is setup\n"), 3981 devname); 3982 (void) fflush(stdout); 3983 } 3984 goto out; 3985 3986 syntax: 3987 /* syntax error */ 3988 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3989 goto out; 3990 3991 options: 3992 /* options error */ 3993 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3994 goto out; 3995 3996 out: 3997 if (msp != NULL) { 3998 if (msp->ext.ext_val != NULL) { 3999 Free(msp->ext.ext_val); 4000 } 4001 Free(msp); 4002 } 4003 4004 return (rval); 4005 } 4006 4007 /* 4008 * FUNCTION: meta_free_sp() 4009 * INPUT: msp - the soft partition unit to free 4010 * OUTPUT: none 4011 * RETURNS: void 4012 * PURPOSE: provides an interface from the rest of libmeta for freeing a 4013 * soft partition unit 4014 */ 4015 void 4016 meta_free_sp(md_sp_t *msp) 4017 { 4018 Free(msp); 4019 } 4020 4021 /* 4022 * FUNCTION: meta_sp_issp() 4023 * INPUT: sp - the set name to check 4024 * np - the name to check 4025 * OUTPUT: ep - return error pointer 4026 * RETURNS: int - 0 means sp,np is a soft partition 4027 * 1 means sp,np is not a soft partition 4028 * PURPOSE: determines whether the given device is a soft partition 4029 * device. This is called by other metadevice check routines. 4030 */ 4031 int 4032 meta_sp_issp( 4033 mdsetname_t *sp, 4034 mdname_t *np, 4035 md_error_t *ep 4036 ) 4037 { 4038 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 4039 return (1); 4040 4041 return (0); 4042 } 4043 4044 /* 4045 * FUNCTION: meta_check_sp() 4046 * INPUT: sp - the set name to check 4047 * msp - the unit structure to check 4048 * options - creation options 4049 * OUTPUT: repart_options - options to be passed to 4050 * meta_repartition_drive() 4051 * ep - return error pointer 4052 * RETURNS: int - 0 ok to create on this component 4053 * -1 error or not ok to create on this component 4054 * PURPOSE: Checks to determine whether the rules for creation of 4055 * soft partitions allow creation of a soft partition on 4056 * the device described by the mdname_t structure referred 4057 * to by msp->compnamep. 4058 * 4059 * NOTE: Does NOT check to determine whether the extents 4060 * described in the md_sp_t structure referred to by 4061 * msp will fit on the device described by the mdname_t 4062 * structure located at msp->compnamep. 4063 */ 4064 static int 4065 meta_check_sp( 4066 mdsetname_t *sp, 4067 md_sp_t *msp, 4068 mdcmdopts_t options, 4069 int *repart_options, 4070 md_error_t *ep 4071 ) 4072 { 4073 md_common_t *mdp; 4074 mdname_t *compnp = msp->compnamep; 4075 uint_t slice; 4076 mddrivename_t *dnp; 4077 mdname_t *slicenp; 4078 mdvtoc_t *vtocp; 4079 4080 /* make sure it is in the set */ 4081 if (meta_check_inset(sp, compnp, ep) != 0) 4082 return (-1); 4083 4084 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4085 uint_t rep_slice; 4086 4087 /* 4088 * check to make sure we can partition this drive. 4089 * we cannot continue if any of the following are 4090 * true: 4091 * The drive is a metadevice. 4092 * The drive contains a mounted slice. 4093 * The drive contains a slice being swapped to. 4094 * The drive contains slices which are part of other 4095 * metadevices. 4096 * The drive contains a metadb. 4097 */ 4098 if (metaismeta(compnp)) 4099 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4100 compnp->cname)); 4101 4102 assert(compnp->drivenamep != NULL); 4103 4104 /* 4105 * ensure that we have slice 0 since the disk will be 4106 * repartitioned in the USE_WHOLE_DISK case. this check 4107 * is redundant unless the user incorrectly specifies a 4108 * a fully qualified drive AND slice name (i.e., 4109 * /dev/dsk/cXtXdXsX), which will be incorrectly 4110 * recognized as a drive name by the metaname code. 4111 */ 4112 4113 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4114 return (-1); 4115 if (slice != MD_SLICE0) 4116 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4117 4118 dnp = compnp->drivenamep; 4119 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4120 return (-1); 4121 4122 for (slice = 0; slice < vtocp->nparts; slice++) { 4123 4124 /* only check if the slice really exists */ 4125 if (vtocp->parts[slice].size == 0) 4126 continue; 4127 4128 slicenp = metaslicename(dnp, slice, ep); 4129 if (slicenp == NULL) 4130 return (-1); 4131 4132 /* check to ensure that it is not already in use */ 4133 if (meta_check_inuse(sp, 4134 slicenp, MDCHK_INUSE, ep) != 0) { 4135 return (-1); 4136 } 4137 4138 /* 4139 * Up to this point, tests are applied to all 4140 * slices uniformly. 4141 */ 4142 4143 if (slice == rep_slice) { 4144 /* 4145 * Tests inside the body of this 4146 * conditional are applied only to 4147 * slice seven. 4148 */ 4149 if (meta_check_inmeta(sp, slicenp, 4150 options | MDCHK_ALLOW_MDDB | 4151 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4152 return (-1); 4153 4154 /* 4155 * For slice seven, a metadb is NOT an 4156 * automatic failure. It merely means 4157 * that we're not allowed to muck 4158 * about with the partitioning of that 4159 * slice. We indicate this by masking 4160 * in the MD_REPART_LEAVE_REP flag. 4161 */ 4162 if (metahasmddb(sp, slicenp, ep)) { 4163 assert(repart_options != 4164 NULL); 4165 *repart_options |= 4166 MD_REPART_LEAVE_REP; 4167 } 4168 4169 /* 4170 * Skip the remaining tests for slice 4171 * seven 4172 */ 4173 continue; 4174 } 4175 4176 /* 4177 * Tests below this point will be applied to 4178 * all slices EXCEPT for the replica slice. 4179 */ 4180 4181 4182 /* check if component is in a metadevice */ 4183 if (meta_check_inmeta(sp, slicenp, options, 0, 4184 -1, ep) != 0) 4185 return (-1); 4186 4187 /* check to see if component has a metadb */ 4188 if (metahasmddb(sp, slicenp, ep)) 4189 return (mddeverror(ep, MDE_HAS_MDDB, 4190 slicenp->dev, slicenp->cname)); 4191 } 4192 /* 4193 * This should be all of the testing necessary when 4194 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4195 * meta_check_sp() is oriented towards component 4196 * arguments instead of disks. 4197 */ 4198 goto meta_check_sp_ok; 4199 4200 } 4201 4202 /* check to ensure that it is not already in use */ 4203 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4204 return (-1); 4205 } 4206 4207 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4208 4209 /* 4210 * The component can have one or more soft partitions on it 4211 * already, but can't be part of any other type of metadevice, 4212 * so if it is used for a metadevice, but the metadevice 4213 * isn't a soft partition, return failure. 4214 */ 4215 4216 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4217 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4218 return (-1); 4219 } 4220 } else { /* handle metadevices */ 4221 /* get underlying unit & check capabilities */ 4222 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4223 return (-1); 4224 4225 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4226 (! (mdp->capabilities & MD_CAN_SP))) 4227 return (mdmderror(ep, MDE_INVAL_UNIT, 4228 meta_getminor(compnp->dev), compnp->cname)); 4229 } 4230 4231 meta_check_sp_ok: 4232 mdclrerror(ep); 4233 return (0); 4234 } 4235 4236 /* 4237 * FUNCTION: meta_create_sp() 4238 * INPUT: sp - the set name to create in 4239 * msp - the unit structure to create 4240 * oblist - an optional list of requested extents (-o/-b options) 4241 * options - creation options 4242 * alignment - data alignment 4243 * OUTPUT: ep - return error pointer 4244 * RETURNS: int - 0 success, -1 error 4245 * PURPOSE: does most of the work for creating a soft partition. If 4246 * metainit -p -e was used, first partition the drive. Then 4247 * create an extent list based on the existing soft partitions 4248 * and assume all space not used by them is free. Storage for 4249 * the new soft partition is allocated from the free extents 4250 * based on the length specified on the command line or the 4251 * oblist passed in. The unit structure is then committed and 4252 * the watermarks are updated. Finally, the status is changed to 4253 * Okay and the process is complete. 4254 */ 4255 static int 4256 meta_create_sp( 4257 mdsetname_t *sp, 4258 md_sp_t *msp, 4259 sp_ext_node_t *oblist, 4260 mdcmdopts_t options, 4261 sp_ext_length_t alignment, 4262 md_error_t *ep 4263 ) 4264 { 4265 mdname_t *np = msp->common.namep; 4266 mdname_t *compnp = msp->compnamep; 4267 mp_unit_t *mp = NULL; 4268 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4269 md_set_params_t set_params; 4270 int rval = -1; 4271 diskaddr_t comp_size; 4272 diskaddr_t sp_start; 4273 sp_ext_node_t *extlist = NULL; 4274 int numexts = 0; /* number of extents */ 4275 int count = 0; 4276 int committed = 0; 4277 int repart_options = MD_REPART_FORCE; 4278 int create_flag = MD_CRO_32BIT; 4279 4280 md_set_desc *sd; 4281 mm_unit_t *mm; 4282 md_set_mmown_params_t *ownpar = NULL; 4283 int comp_is_mirror = 0; 4284 4285 /* validate soft partition */ 4286 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4287 return (-1); 4288 4289 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4290 if ((options & MDCMD_DOIT) != 0) { 4291 if (meta_repartition_drive(sp, 4292 compnp->drivenamep, 4293 repart_options, 4294 NULL, /* Don't return the VTOC */ 4295 ep) != 0) 4296 4297 return (-1); 4298 } else { 4299 /* 4300 * If -n and -e are both specified, it doesn't make 4301 * sense to continue without actually partitioning 4302 * the drive. 4303 */ 4304 return (0); 4305 } 4306 } 4307 4308 /* populate the start_blk field of the component name */ 4309 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4310 MD_DISKADDR_ERROR) { 4311 rval = -1; 4312 goto out; 4313 } 4314 4315 if (options & MDCMD_DOIT) { 4316 /* store name in namespace */ 4317 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4318 rval = -1; 4319 goto out; 4320 } 4321 } 4322 4323 /* 4324 * Get a list of the soft partitions that currently reside on 4325 * the component. We should ALWAYS force reload the cache, 4326 * because if this is a single creation, there will not BE a 4327 * cached list, and if we're using the md.tab, we must rebuild 4328 * the list because it won't contain the previous (if any) 4329 * soft partition. 4330 */ 4331 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4332 if (count < 0) { 4333 /* error occured */ 4334 rval = -1; 4335 goto out; 4336 } 4337 4338 /* 4339 * get the size of the underlying device. if the size is smaller 4340 * than or equal to the watermark size, we know there isn't 4341 * enough space. 4342 */ 4343 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4344 rval = -1; 4345 goto out; 4346 } else if (comp_size <= MD_SP_WMSIZE) { 4347 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4348 rval = -1; 4349 goto out; 4350 } 4351 /* 4352 * seed extlist with reserved space at the beginning of the volume and 4353 * enough space for the end watermark. The end watermark always gets 4354 * updated, but if the underlying device changes size it may not be 4355 * pointed to until the extent before it is updated. Since the 4356 * end of the reserved space is where the first watermark starts, 4357 * the reserved extent should never be marked for updating. 4358 */ 4359 4360 meta_sp_list_insert(NULL, NULL, &extlist, 4361 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4362 meta_sp_list_insert(NULL, NULL, &extlist, 4363 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4364 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4365 4366 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4367 rval = -1; 4368 goto out; 4369 } 4370 4371 metafreenamelist(spnlp); 4372 4373 if (getenv(META_SP_DEBUG)) { 4374 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4375 meta_sp_list_dump(extlist); 4376 } 4377 4378 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4379 4380 /* get extent list from -o/-b options or from free space */ 4381 if (options & MDCMD_DIRECT) { 4382 if (getenv(META_SP_DEBUG)) { 4383 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4384 meta_sp_list_dump(oblist); 4385 } 4386 4387 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4388 if (numexts == -1) { 4389 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4390 rval = -1; 4391 goto out; 4392 } 4393 } else { 4394 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4395 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4396 meta_sp_get_default_alignment(sp, compnp, ep)); 4397 if (numexts == -1) { 4398 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4399 rval = -1; 4400 goto out; 4401 } 4402 } 4403 4404 assert(extlist != NULL); 4405 4406 /* create soft partition */ 4407 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4408 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4409 4410 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4411 4412 /* if we're not doing anything (metainit -n), return success */ 4413 if (! (options & MDCMD_DOIT)) { 4414 rval = 0; /* success */ 4415 goto out; 4416 } 4417 4418 (void) memset(&set_params, 0, sizeof (set_params)); 4419 4420 if (create_flag == MD_CRO_64BIT) { 4421 mp->c.un_revision |= MD_64BIT_META_DEV; 4422 set_params.options = MD_CRO_64BIT; 4423 } else { 4424 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4425 set_params.options = MD_CRO_32BIT; 4426 } 4427 4428 if (getenv(META_SP_DEBUG)) { 4429 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4430 meta_sp_printunit(mp); 4431 } 4432 4433 /* 4434 * Check to see if we're trying to create a partition on a mirror. If so 4435 * we may have to enforce an ownership change before writing the 4436 * watermark out. 4437 */ 4438 if (metaismeta(compnp)) { 4439 char *miscname; 4440 4441 miscname = metagetmiscname(compnp, ep); 4442 if (miscname != NULL) 4443 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4444 else 4445 comp_is_mirror = 0; 4446 } else { 4447 comp_is_mirror = 0; 4448 } 4449 4450 /* 4451 * For a multi-node environment we have to ensure that the master 4452 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4453 * If the master does not own the device we will deadlock as the 4454 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4455 * ownership change that will block as the MD_IOCSET is still in 4456 * progress. To close this window we force an owner change to occur 4457 * before issuing the MD_IOCSET. We cannot simply open the device and 4458 * write to it as this will only work for the first soft-partition 4459 * creation. 4460 */ 4461 4462 if (comp_is_mirror && !metaislocalset(sp)) { 4463 4464 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4465 rval = -1; 4466 goto out; 4467 } 4468 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4469 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 4470 if (mm == NULL) { 4471 rval = -1; 4472 goto out; 4473 } else { 4474 rval = meta_mn_change_owner(&ownpar, sp->setno, 4475 meta_getminor(compnp->dev), 4476 sd->sd_mn_mynode->nd_nodeid, 4477 MD_MN_MM_PREVENT_CHANGE | 4478 MD_MN_MM_SPAWN_THREAD); 4479 if (rval == -1) 4480 goto out; 4481 } 4482 } 4483 } 4484 4485 set_params.mnum = MD_SID(mp); 4486 set_params.size = mp->c.un_size; 4487 set_params.mdp = (uintptr_t)mp; 4488 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4489 4490 /* first phase of commit. */ 4491 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4492 np->cname) != 0) { 4493 (void) mdstealerror(ep, &set_params.mde); 4494 rval = -1; 4495 goto out; 4496 } 4497 4498 /* we've successfully committed the record */ 4499 committed = 1; 4500 4501 /* write watermarks */ 4502 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4503 rval = -1; 4504 goto out; 4505 } 4506 4507 /* 4508 * Allow mirror ownership to change. If we don't succeed in this 4509 * ioctl it isn't fatal, but the cluster will probably hang fairly 4510 * soon as the mirror owner won't change. However, we have 4511 * successfully written the watermarks out to the device so the 4512 * softpart creation has succeeded 4513 */ 4514 if (ownpar) { 4515 (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum, 4516 ownpar->d.owner, 4517 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 4518 } 4519 4520 /* second phase of commit, set status to MD_SP_OK */ 4521 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4522 rval = -1; 4523 goto out; 4524 } 4525 rval = 0; 4526 out: 4527 Free(mp); 4528 if (ownpar) 4529 Free(ownpar); 4530 4531 if (extlist != NULL) 4532 meta_sp_list_free(&extlist); 4533 4534 if (rval != 0 && keynlp != NULL && committed != 1) 4535 (void) del_key_names(sp, keynlp, NULL); 4536 4537 metafreenamelist(keynlp); 4538 4539 return (rval); 4540 } 4541 4542 /* 4543 * ************************************************************************** 4544 * Reset (metaclear) Functions * 4545 * ************************************************************************** 4546 */ 4547 4548 /* 4549 * FUNCTION: meta_sp_reset_common() 4550 * INPUT: sp - the set name of the device to reset 4551 * np - the name of the device to reset 4552 * msp - the unit structure to reset 4553 * options - metaclear options 4554 * OUTPUT: ep - return error pointer 4555 * RETURNS: int - 0 success, -1 error 4556 * PURPOSE: "resets", or more accurately deletes, the soft partition 4557 * specified. First the state is set to "deleting" and then the 4558 * watermarks are all cleared out. Once the watermarks have been 4559 * updated, the unit structure is deleted from the metadb. 4560 */ 4561 static int 4562 meta_sp_reset_common( 4563 mdsetname_t *sp, 4564 mdname_t *np, 4565 md_sp_t *msp, 4566 md_sp_reset_t reset_params, 4567 mdcmdopts_t options, 4568 md_error_t *ep 4569 ) 4570 { 4571 char *miscname; 4572 int rval = -1; 4573 int is_open = 0; 4574 4575 /* make sure that nobody owns us */ 4576 if (MD_HAS_PARENT(msp->common.parent)) 4577 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4578 np->cname)); 4579 4580 /* make sure that the soft partition isn't open */ 4581 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4582 return (-1); 4583 else if (is_open) 4584 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4585 np->cname)); 4586 4587 /* get miscname */ 4588 if ((miscname = metagetmiscname(np, ep)) == NULL) 4589 return (-1); 4590 4591 /* fill in reset params */ 4592 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4593 reset_params.mnum = meta_getminor(np->dev); 4594 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4595 4596 /* 4597 * clear soft partition - phase one. 4598 * place the soft partition into the "delete pending" state. 4599 */ 4600 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4601 return (-1); 4602 4603 /* 4604 * Now clear the watermarks. If the force flag is specified, 4605 * ignore any errors writing the watermarks and delete the unit 4606 * structure anyway. An error may leave the on-disk format in a 4607 * corrupt state. If force is not specified and we fail here, 4608 * the soft partition will remain in the "delete pending" state. 4609 */ 4610 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4611 ((options & MDCMD_FORCE) == 0)) 4612 goto out; 4613 4614 /* 4615 * clear soft partition - phase two. 4616 * the driver removes the soft partition from the metadb and 4617 * zeros out incore version. 4618 */ 4619 if (metaioctl(MD_IOCRESET, &reset_params, 4620 &reset_params.mde, np->cname) != 0) { 4621 (void) mdstealerror(ep, &reset_params.mde); 4622 goto out; 4623 } 4624 rval = 0; /* success */ 4625 4626 if (options & MDCMD_PRINT) { 4627 (void) printf(dgettext(TEXT_DOMAIN, 4628 "%s: Soft Partition is cleared\n"), 4629 np->cname); 4630 (void) fflush(stdout); 4631 } 4632 4633 /* 4634 * if told to recurse and on a metadevice, then attempt to 4635 * clear the subdevices. Indicate failure if the clear fails. 4636 */ 4637 if ((options & MDCMD_RECURSE) && 4638 (metaismeta(msp->compnamep)) && 4639 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4640 rval = -1; 4641 4642 out: 4643 meta_invalidate_name(np); 4644 return (rval); 4645 } 4646 4647 /* 4648 * FUNCTION: meta_sp_reset() 4649 * INPUT: sp - the set name of the device to reset 4650 * np - the name of the device to reset 4651 * options - metaclear options 4652 * OUTPUT: ep - return error pointer 4653 * RETURNS: int - 0 success, -1 error 4654 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4655 * soft partition. If np is NULL, then soft partitions are 4656 * all deleted at the current level and then recursively deleted. 4657 * Otherwise, if a name is specified either directly or as a 4658 * result of a recursive operation, it deletes only that name. 4659 * Since something sitting under a soft partition may be parented 4660 * to it, we have to reparent that other device to another soft 4661 * partition on the same component if we're deleting the one it's 4662 * parented to. 4663 */ 4664 int 4665 meta_sp_reset( 4666 mdsetname_t *sp, 4667 mdname_t *np, 4668 mdcmdopts_t options, 4669 md_error_t *ep 4670 ) 4671 { 4672 md_sp_t *msp; 4673 int rval = -1; 4674 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4675 md_sp_reset_t reset_params; 4676 int num_sp; 4677 4678 assert(sp != NULL); 4679 4680 /* reset/delete all soft paritions */ 4681 if (np == NULL) { 4682 /* 4683 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4684 * is incorrect for soft partitions. We want to clear 4685 * all soft partitions at a particular level in the 4686 * metadevice stack before moving to the next level. 4687 * Thus, we clear MDCMD_RECURSE from the options. 4688 */ 4689 options &= ~MDCMD_RECURSE; 4690 4691 /* for each soft partition */ 4692 rval = 0; 4693 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4694 rval = -1; 4695 4696 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4697 np = nlp->namep; 4698 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4699 rval = -1; 4700 break; 4701 } 4702 /* 4703 * meta_reset_all calls us twice to get soft 4704 * partitions at the top and bottom of the stack. 4705 * thus, if we have a parent, we'll get deleted 4706 * on the next call. 4707 */ 4708 if (MD_HAS_PARENT(msp->common.parent)) 4709 continue; 4710 /* 4711 * If this is a multi-node set, we send a series 4712 * of individual metaclear commands. 4713 */ 4714 if (meta_is_mn_set(sp, ep)) { 4715 if (meta_mn_send_metaclear_command(sp, 4716 np->cname, options, 0, ep) != 0) { 4717 rval = -1; 4718 break; 4719 } 4720 } else { 4721 if (meta_sp_reset(sp, np, options, ep) != 0) { 4722 rval = -1; 4723 break; 4724 } 4725 } 4726 } 4727 /* cleanup return status */ 4728 metafreenamelist(spnlp); 4729 return (rval); 4730 } 4731 4732 /* check the name */ 4733 if (metachkmeta(np, ep) != 0) 4734 return (-1); 4735 4736 /* get the unit structure */ 4737 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4738 return (-1); 4739 4740 /* clear out reset parameters */ 4741 (void) memset(&reset_params, 0, sizeof (reset_params)); 4742 4743 /* if our child is a metadevice, we need to deparent/reparent it */ 4744 if (metaismeta(msp->compnamep)) { 4745 /* get sp's on this component */ 4746 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4747 &spnlp, 1, ep)) <= 0) 4748 /* no sp's on this device. error! */ 4749 return (-1); 4750 else if (num_sp == 1) 4751 /* last sp on this device, so we deparent */ 4752 reset_params.new_parent = MD_NO_PARENT; 4753 else { 4754 /* have to reparent this metadevice */ 4755 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4756 if (meta_getminor(nlp->namep->dev) == 4757 meta_getminor(np->dev)) 4758 continue; 4759 /* 4760 * this isn't the softpart we are deleting, 4761 * so use this device as the new parent. 4762 */ 4763 reset_params.new_parent = 4764 meta_getminor(nlp->namep->dev); 4765 break; 4766 } 4767 } 4768 metafreenamelist(spnlp); 4769 } 4770 4771 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4772 return (-1); 4773 4774 return (0); 4775 } 4776 4777 /* 4778 * FUNCTION: meta_sp_reset_component() 4779 * INPUT: sp - the set name of the device to reset 4780 * name - the string name of the device to reset 4781 * options - metaclear options 4782 * OUTPUT: ep - return error pointer 4783 * RETURNS: int - 0 success, -1 error 4784 * PURPOSE: provides the ability to delete all soft partitions on a 4785 * specified device (metaclear -p). It first gets all of the 4786 * soft partitions on the component and then deletes each one 4787 * individually. 4788 */ 4789 int 4790 meta_sp_reset_component( 4791 mdsetname_t *sp, 4792 char *name, 4793 mdcmdopts_t options, 4794 md_error_t *ep 4795 ) 4796 { 4797 mdname_t *compnp, *np; 4798 mdnamelist_t *spnlp = NULL; 4799 mdnamelist_t *nlp = NULL; 4800 md_sp_t *msp; 4801 int count; 4802 md_sp_reset_t reset_params; 4803 4804 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4805 return (-1); 4806 4807 /* If we're starting out with no soft partitions, it's an error */ 4808 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4809 if (count == 0) 4810 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4811 else if (count < 0) 4812 return (-1); 4813 4814 /* 4815 * clear all soft partitions on this component. 4816 * NOTE: we reparent underlying metadevices as we go so that 4817 * things stay sane. Also, if we encounter an error, we stop 4818 * and go no further in case recovery might be needed. 4819 */ 4820 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4821 /* clear out reset parameters */ 4822 (void) memset(&reset_params, 0, sizeof (reset_params)); 4823 4824 /* check the name */ 4825 np = nlp->namep; 4826 4827 if (metachkmeta(np, ep) != 0) { 4828 metafreenamelist(spnlp); 4829 return (-1); 4830 } 4831 4832 /* get the unit structure */ 4833 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4834 metafreenamelist(spnlp); 4835 return (-1); 4836 } 4837 4838 /* have to deparent/reparent metadevices */ 4839 if (metaismeta(compnp)) { 4840 if (nlp->next == NULL) 4841 reset_params.new_parent = MD_NO_PARENT; 4842 else 4843 reset_params.new_parent = 4844 meta_getminor(spnlp->next->namep->dev); 4845 } 4846 4847 /* clear soft partition */ 4848 if (meta_sp_reset_common(sp, np, msp, reset_params, 4849 options, ep) < 0) { 4850 metafreenamelist(spnlp); 4851 return (-1); 4852 } 4853 } 4854 metafreenamelist(spnlp); 4855 return (0); 4856 } 4857 4858 /* 4859 * ************************************************************************** 4860 * Grow (metattach) Functions * 4861 * ************************************************************************** 4862 */ 4863 4864 /* 4865 * FUNCTION: meta_sp_attach() 4866 * INPUT: sp - the set name of the device to attach to 4867 * np - the name of the device to attach to 4868 * addsize - the unparsed string holding the amount of space to add 4869 * options - metattach options 4870 * alignment - data alignment 4871 * OUTPUT: ep - return error pointer 4872 * RETURNS: int - 0 success, -1 error 4873 * PURPOSE: grows a soft partition by reading in the existing unit 4874 * structure and setting its state to Growing, allocating more 4875 * space (similar to meta_create_sp()), updating the watermarks, 4876 * and then writing out the new unit structure in the Okay state. 4877 */ 4878 int 4879 meta_sp_attach( 4880 mdsetname_t *sp, 4881 mdname_t *np, 4882 char *addsize, 4883 mdcmdopts_t options, 4884 sp_ext_length_t alignment, 4885 md_error_t *ep 4886 ) 4887 { 4888 md_grow_params_t grow_params; 4889 sp_ext_length_t grow_len; /* amount to grow */ 4890 mp_unit_t *mp, *new_un; 4891 mdname_t *compnp = NULL; 4892 4893 sp_ext_node_t *extlist = NULL; 4894 int numexts; 4895 mdnamelist_t *spnlp = NULL; 4896 int count; 4897 md_sp_t *msp; 4898 daddr_t start_block; 4899 4900 /* should have the same set */ 4901 assert(sp != NULL); 4902 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4903 4904 /* check name */ 4905 if (metachkmeta(np, ep) != 0) 4906 return (-1); 4907 4908 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4909 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4910 } 4911 4912 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4913 return (-1); 4914 4915 /* make sure we don't have a parent */ 4916 if (MD_HAS_PARENT(mp->c.un_parent)) { 4917 Free(mp); 4918 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4919 } 4920 4921 if (getenv(META_SP_DEBUG)) { 4922 meta_sp_debug("meta_sp_attach: Unit structure before new " 4923 "space:\n"); 4924 meta_sp_printunit(mp); 4925 } 4926 4927 /* 4928 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4929 * If this was not the case we would suffer the following 4930 * assertion failure: 4931 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4932 * file meta_check.x, line 315 4933 * I guess this is because we have not "seen" this drive before 4934 * and hence hit the failure - this is of course the attach routine 4935 */ 4936 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4937 Free(mp); 4938 return (-1); 4939 } 4940 4941 /* metakeyname does not fill in the key. */ 4942 compnp->key = mp->un_key; 4943 4944 /* work out the space on the component that we are dealing with */ 4945 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4946 4947 /* 4948 * see if the component has been soft partitioned yet, or if an 4949 * error occurred. 4950 */ 4951 if (count == 0) { 4952 Free(mp); 4953 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4954 } else if (count < 0) { 4955 Free(mp); 4956 return (-1); 4957 } 4958 4959 /* 4960 * seed extlist with reserved space at the beginning of the volume and 4961 * enough space for the end watermark. The end watermark always gets 4962 * updated, but if the underlying device changes size it may not be 4963 * pointed to until the extent before it is updated. Since the 4964 * end of the reserved space is where the first watermark starts, 4965 * the reserved extent should never be marked for updating. 4966 */ 4967 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4968 MD_DISKADDR_ERROR) { 4969 Free(mp); 4970 return (-1); 4971 } 4972 4973 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4974 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4975 meta_sp_list_insert(NULL, NULL, &extlist, 4976 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4977 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4978 4979 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4980 Free(mp); 4981 return (-1); 4982 } 4983 4984 metafreenamelist(spnlp); 4985 4986 if (getenv(META_SP_DEBUG)) { 4987 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4988 meta_sp_list_dump(extlist); 4989 } 4990 4991 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4992 4993 assert(mp->un_numexts >= 1); 4994 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4995 mp->un_ext[mp->un_numexts - 1].un_poff, 4996 (alignment > 0) ? alignment : 4997 meta_sp_get_default_alignment(sp, compnp, ep)); 4998 4999 if (numexts == -1) { 5000 Free(mp); 5001 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 5002 } 5003 5004 /* allocate new unit structure and copy in old unit */ 5005 if ((new_un = meta_sp_updateunit(np, mp, extlist, 5006 grow_len, numexts, ep)) == NULL) { 5007 Free(mp); 5008 return (-1); 5009 } 5010 Free(mp); 5011 5012 /* If running in dryrun mode (-n option), we're done here */ 5013 if ((options & MDCMD_DOIT) == 0) { 5014 if (options & MDCMD_PRINT) { 5015 (void) printf(dgettext(TEXT_DOMAIN, 5016 "%s: Soft Partition would grow\n"), 5017 np->cname); 5018 (void) fflush(stdout); 5019 } 5020 return (0); 5021 } 5022 5023 if (getenv(META_SP_DEBUG)) { 5024 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 5025 meta_sp_printunit(new_un); 5026 } 5027 5028 assert(new_un != NULL); 5029 5030 (void) memset(&grow_params, 0, sizeof (grow_params)); 5031 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 5032 grow_params.options = MD_CRO_64BIT; 5033 new_un->c.un_revision |= MD_64BIT_META_DEV; 5034 } else { 5035 grow_params.options = MD_CRO_32BIT; 5036 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 5037 } 5038 grow_params.mnum = MD_SID(new_un); 5039 grow_params.size = new_un->c.un_size; 5040 grow_params.mdp = (uintptr_t)new_un; 5041 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5042 5043 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5044 np->cname) != 0) { 5045 (void) mdstealerror(ep, &grow_params.mde); 5046 return (-1); 5047 } 5048 5049 /* update all watermarks */ 5050 5051 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5052 return (-1); 5053 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5054 return (-1); 5055 5056 5057 /* second phase of commit, set status to MD_SP_OK */ 5058 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5059 return (-1); 5060 5061 meta_invalidate_name(np); 5062 5063 if (options & MDCMD_PRINT) { 5064 (void) printf(dgettext(TEXT_DOMAIN, 5065 "%s: Soft Partition has been grown\n"), 5066 np->cname); 5067 (void) fflush(stdout); 5068 } 5069 5070 return (0); 5071 } 5072 5073 /* 5074 * ************************************************************************** 5075 * Recovery (metarecover) Functions * 5076 * ************************************************************************** 5077 */ 5078 5079 /* 5080 * FUNCTION: meta_recover_sp() 5081 * INPUT: sp - the name of the set we are recovering on 5082 * compnp - name pointer for device we are recovering on 5083 * argc - argument count 5084 * argv - left over arguments not parsed by metarecover command 5085 * options - metarecover options 5086 * OUTPUT: ep - return error pointer 5087 * RETURNS: int - 0 - success, -1 - error 5088 * PURPOSE: parse soft partitioning-specific metarecover options and 5089 * dispatch to the appropriate function to handle recovery. 5090 */ 5091 int 5092 meta_recover_sp( 5093 mdsetname_t *sp, 5094 mdname_t *compnp, 5095 int argc, 5096 char *argv[], 5097 mdcmdopts_t options, 5098 md_error_t *ep 5099 ) 5100 { 5101 md_set_desc *sd; 5102 5103 if (argc > 1) { 5104 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5105 argc, argv); 5106 return (-1); 5107 } 5108 5109 /* 5110 * For a MN set, this operation must be performed on the master 5111 * as it is responsible for maintaining the watermarks 5112 */ 5113 if (!metaislocalset(sp)) { 5114 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5115 return (-1); 5116 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5117 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5118 sd->sd_mn_master_nodenm, NULL, NULL); 5119 return (-1); 5120 } 5121 } 5122 if (argc == 0) { 5123 /* 5124 * if no additional arguments are passed, metarecover should 5125 * validate both on-disk and metadb structures as well as 5126 * checking that both are consistent with each other 5127 */ 5128 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5129 return (-1); 5130 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5131 return (-1); 5132 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5133 return (-1); 5134 } else if (strcmp(argv[0], "-d") == 0) { 5135 /* 5136 * Ensure that there is no existing valid record for this 5137 * soft-partition. If there is we have nothing to do. 5138 */ 5139 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5140 return (-1); 5141 /* validate and recover from on-disk structures */ 5142 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5143 return (-1); 5144 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5145 return (-1); 5146 } else if (strcmp(argv[0], "-m") == 0) { 5147 /* validate and recover from metadb structures */ 5148 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5149 return (-1); 5150 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5151 return (-1); 5152 } else { 5153 /* syntax error */ 5154 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5155 argc, argv); 5156 return (-1); 5157 } 5158 5159 return (0); 5160 } 5161 5162 /* 5163 * FUNCTION: meta_sp_display_exthdr() 5164 * INPUT: none 5165 * OUTPUT: none 5166 * RETURNS: void 5167 * PURPOSE: print header line for sp_ext_node_t information. to be used 5168 * in conjunction with meta_sp_display_ext(). 5169 */ 5170 static void 5171 meta_sp_display_exthdr(void) 5172 { 5173 (void) printf("%20s %5s %7s %20s %20s\n", 5174 dgettext(TEXT_DOMAIN, "Name"), 5175 dgettext(TEXT_DOMAIN, "Seq#"), 5176 dgettext(TEXT_DOMAIN, "Type"), 5177 dgettext(TEXT_DOMAIN, "Offset"), 5178 dgettext(TEXT_DOMAIN, "Length")); 5179 } 5180 5181 5182 /* 5183 * FUNCTION: meta_sp_display_ext() 5184 * INPUT: ext - extent to display 5185 * OUTPUT: none 5186 * RETURNS: void 5187 * PURPOSE: print selected fields from sp_ext_node_t. 5188 */ 5189 static void 5190 meta_sp_display_ext(sp_ext_node_t *ext) 5191 { 5192 /* print extent information */ 5193 if (ext->ext_namep != NULL) 5194 (void) printf("%20s ", ext->ext_namep->cname); 5195 else 5196 (void) printf("%20s ", "NONE"); 5197 5198 (void) printf("%5u ", ext->ext_seq); 5199 5200 switch (ext->ext_type) { 5201 case EXTTYP_ALLOC: 5202 (void) printf("%7s ", "ALLOC"); 5203 break; 5204 case EXTTYP_FREE: 5205 (void) printf("%7s ", "FREE"); 5206 break; 5207 case EXTTYP_RESERVED: 5208 (void) printf("%7s ", "RESV"); 5209 break; 5210 case EXTTYP_END: 5211 (void) printf("%7s ", "END"); 5212 break; 5213 default: 5214 (void) printf("%7s ", "INVLD"); 5215 break; 5216 } 5217 5218 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5219 } 5220 5221 5222 /* 5223 * FUNCTION: meta_sp_checkseq() 5224 * INPUT: extlist - list of extents to be checked 5225 * OUTPUT: none 5226 * RETURNS: int - 0 - success, -1 - error 5227 * PURPOSE: check soft partition sequence numbers. this function assumes 5228 * that a list of extents representing 1 or more soft partitions 5229 * is passed in sorted in sequence number order. within a 5230 * single soft partition, there may not be any missing or 5231 * duplicate sequence numbers. 5232 */ 5233 static int 5234 meta_sp_checkseq(sp_ext_node_t *extlist) 5235 { 5236 sp_ext_node_t *ext; 5237 5238 assert(extlist != NULL); 5239 5240 for (ext = extlist; 5241 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5242 ext = ext->ext_next) { 5243 if (ext->ext_next->ext_namep != NULL && 5244 strcmp(ext->ext_next->ext_namep->cname, 5245 ext->ext_namep->cname) != 0) 5246 continue; 5247 5248 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5249 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5250 "%s: sequence numbers are " 5251 "incorrect: %d should be %d\n"), 5252 ext->ext_next->ext_namep->cname, 5253 ext->ext_next->ext_seq, ext->ext_seq + 1); 5254 return (-1); 5255 } 5256 } 5257 return (0); 5258 } 5259 5260 5261 /* 5262 * FUNCTION: meta_sp_resolve_name_conflict() 5263 * INPUT: sp - name of set we're are recovering in. 5264 * old_np - name pointer of soft partition we found on disk. 5265 * OUTPUT: new_np - name pointer for new soft partition name. 5266 * ep - error pointer returned. 5267 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5268 * PURPOSE: Check to see if the name of one of the soft partitions we found 5269 * on disk already exists in the metadb. If so, prompt for a new 5270 * name. In addition, we keep a static array of names that 5271 * will be recovered from this device since these names don't 5272 * exist in the configuration at this point but cannot be 5273 * recovered more than once. 5274 */ 5275 static int 5276 meta_sp_resolve_name_conflict( 5277 mdsetname_t *sp, 5278 mdname_t *old_np, 5279 mdname_t **new_np, 5280 md_error_t *ep 5281 ) 5282 { 5283 char yesno[255]; 5284 char *yes; 5285 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5286 int nunits; 5287 static int *used_names = NULL; 5288 5289 assert(old_np != NULL); 5290 5291 if (used_names == NULL) { 5292 if ((nunits = meta_get_nunits(ep)) < 0) 5293 return (-1); 5294 used_names = Zalloc(nunits * sizeof (int)); 5295 } 5296 5297 /* see if it exists already */ 5298 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5299 metagetmiscname(old_np, ep) == NULL) { 5300 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5301 return (-1); 5302 else { 5303 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5304 mdclrerror(ep); 5305 return (0); 5306 } 5307 } 5308 5309 /* name exists, ask the user for a new one */ 5310 (void) printf(dgettext(TEXT_DOMAIN, 5311 "WARNING: A soft partition named %s was found in the extent\n" 5312 "headers, but this name already exists in the metadb " 5313 "configuration.\n" 5314 "In order to continue recovery you must supply\n" 5315 "a new name for this soft partition.\n"), old_np->cname); 5316 (void) printf(dgettext(TEXT_DOMAIN, 5317 "Would you like to continue and supply a new name? (yes/no) ")); 5318 5319 (void) fflush(stdout); 5320 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5321 (strlen(yesno) == 1)) 5322 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5323 dgettext(TEXT_DOMAIN, "no")); 5324 yes = dgettext(TEXT_DOMAIN, "yes"); 5325 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5326 return (-1); 5327 } 5328 5329 (void) fflush(stdin); 5330 5331 /* get the new name */ 5332 for (;;) { 5333 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5334 "for this soft partition (dXXXX) ")); 5335 (void) fflush(stdout); 5336 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5337 (void) strcpy(newname, ""); 5338 5339 /* remove newline character */ 5340 if (newname[strlen(newname) - 1] == '\n') 5341 newname[strlen(newname) - 1] = '\0'; 5342 5343 if (!(is_metaname(newname)) || 5344 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5345 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5346 "Invalid metadevice name\n")); 5347 (void) fflush(stderr); 5348 continue; 5349 } 5350 5351 if ((*new_np = metaname(&sp, newname, 5352 META_DEVICE, ep)) == NULL) { 5353 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5354 "Invalid metadevice name\n")); 5355 (void) fflush(stderr); 5356 continue; 5357 } 5358 5359 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5360 /* make sure the name isn't already being used */ 5361 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5362 metagetmiscname(*new_np, ep) != NULL) { 5363 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5364 "That name already exists\n")); 5365 continue; 5366 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5367 return (-1); 5368 5369 break; 5370 } 5371 5372 /* got a new name, place in used array and return */ 5373 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5374 mdclrerror(ep); 5375 return (1); 5376 } 5377 5378 /* 5379 * FUNCTION: meta_sp_validate_wm() 5380 * INPUT: sp - set name we are recovering in 5381 * compnp - name pointer for device we are recovering from 5382 * options - metarecover options 5383 * OUTPUT: ep - error pointer returned 5384 * RETURNS: int - 0 - success, -1 - error 5385 * PURPOSE: validate and display watermark configuration. walk the 5386 * on-disk watermark structures and validate the information 5387 * found within. since a watermark configuration is 5388 * "self-defining", the act of traversing the watermarks 5389 * is part of the validation process. 5390 */ 5391 static int 5392 meta_sp_validate_wm( 5393 mdsetname_t *sp, 5394 mdname_t *compnp, 5395 mdcmdopts_t options, 5396 md_error_t *ep 5397 ) 5398 { 5399 sp_ext_node_t *extlist = NULL; 5400 sp_ext_node_t *ext; 5401 int num_sps = 0; 5402 int rval; 5403 5404 if ((options & MDCMD_VERBOSE) != 0) 5405 (void) printf(dgettext(TEXT_DOMAIN, 5406 "Verifying on-disk structures on %s.\n"), 5407 compnp->cname); 5408 5409 /* 5410 * for each watermark, build an ext_node, place on list. 5411 */ 5412 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5413 meta_sp_cmp_by_nameseq, ep); 5414 5415 if ((options & MDCMD_VERBOSE) != 0) { 5416 /* print out what we found */ 5417 if (extlist == NULL) 5418 (void) printf(dgettext(TEXT_DOMAIN, 5419 "No extent headers found on %s.\n"), 5420 compnp->cname); 5421 else { 5422 (void) printf(dgettext(TEXT_DOMAIN, 5423 "The following extent headers were found on %s.\n"), 5424 compnp->cname); 5425 meta_sp_display_exthdr(); 5426 } 5427 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5428 meta_sp_display_ext(ext); 5429 } 5430 5431 if (rval < 0) { 5432 (void) printf(dgettext(TEXT_DOMAIN, 5433 "%s: On-disk structures invalid or " 5434 "no soft partitions found.\n"), 5435 compnp->cname); 5436 return (-1); 5437 } 5438 5439 assert(extlist != NULL); 5440 5441 /* count number of soft partitions */ 5442 for (ext = extlist; 5443 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5444 ext = ext->ext_next) { 5445 if (ext->ext_next != NULL && 5446 ext->ext_next->ext_namep != NULL && 5447 strcmp(ext->ext_next->ext_namep->cname, 5448 ext->ext_namep->cname) == 0) 5449 continue; 5450 num_sps++; 5451 } 5452 5453 if ((options & MDCMD_VERBOSE) != 0) 5454 (void) printf(dgettext(TEXT_DOMAIN, 5455 "Found %d soft partition(s) on %s.\n"), num_sps, 5456 compnp->cname); 5457 5458 if (num_sps == 0) { 5459 (void) printf(dgettext(TEXT_DOMAIN, 5460 "%s: No soft partitions.\n"), compnp->cname); 5461 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5462 } 5463 5464 /* check sequence numbers */ 5465 if ((options & MDCMD_VERBOSE) != 0) 5466 (void) printf(dgettext(TEXT_DOMAIN, 5467 "Checking sequence numbers.\n")); 5468 5469 if (meta_sp_checkseq(extlist) != 0) 5470 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5471 5472 return (0); 5473 } 5474 5475 /* 5476 * FUNCTION: meta_sp_validate_unit() 5477 * INPUT: sp - name of set we are recovering in 5478 * compnp - name of component we are recovering from 5479 * options - metarecover options 5480 * OUTPUT: ep - error pointer returned 5481 * RETURNS: int - 0 - success, -1 - error 5482 * PURPOSE: validate and display metadb configuration. begin by getting 5483 * all soft partitions built on the specified component. get 5484 * the unit structure for each one and validate the fields within. 5485 */ 5486 static int 5487 meta_sp_validate_unit( 5488 mdsetname_t *sp, 5489 mdname_t *compnp, 5490 mdcmdopts_t options, 5491 md_error_t *ep 5492 ) 5493 { 5494 md_sp_t *msp; 5495 mdnamelist_t *spnlp = NULL; 5496 mdnamelist_t *namep = NULL; 5497 int count; 5498 uint_t extn; 5499 sp_ext_length_t size; 5500 5501 if ((options & MDCMD_VERBOSE) != 0) 5502 (void) printf(dgettext(TEXT_DOMAIN, 5503 "%s: Validating soft partition metadb entries.\n"), 5504 compnp->cname); 5505 5506 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5507 return (-1); 5508 5509 /* get all soft partitions on component */ 5510 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5511 5512 if (count == 0) { 5513 (void) printf(dgettext(TEXT_DOMAIN, 5514 "%s: No soft partitions.\n"), compnp->cname); 5515 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5516 } else if (count < 0) { 5517 return (-1); 5518 } 5519 5520 /* Now go through the soft partitions and check each one */ 5521 for (namep = spnlp; namep != NULL; namep = namep->next) { 5522 mdname_t *curnp = namep->namep; 5523 sp_ext_offset_t curvoff; 5524 5525 /* get the unit structure */ 5526 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5527 return (-1); 5528 5529 /* verify generic unit structure parameters */ 5530 if ((options & MDCMD_VERBOSE) != 0) 5531 (void) printf(dgettext(TEXT_DOMAIN, 5532 "\nVerifying device %s.\n"), 5533 curnp->cname); 5534 5535 /* 5536 * MD_SP_LAST is an invalid state and is always the 5537 * highest numbered. 5538 */ 5539 if (msp->status >= MD_SP_LAST) { 5540 (void) printf(dgettext(TEXT_DOMAIN, 5541 "%s: status value %u is out of range.\n"), 5542 curnp->cname, msp->status); 5543 return (mdmderror(ep, MDE_RECOVER_FAILED, 5544 0, curnp->cname)); 5545 } else if ((options & MDCMD_VERBOSE) != 0) { 5546 uint_t tstate = 0; 5547 5548 if (metaismeta(msp->compnamep)) { 5549 if (meta_get_tstate(msp->common.namep->dev, 5550 &tstate, ep) != 0) 5551 return (-1); 5552 } 5553 (void) printf(dgettext(TEXT_DOMAIN, 5554 "%s: Status \"%s\" is valid.\n"), 5555 curnp->cname, meta_sp_status_to_name(msp->status, 5556 tstate & MD_DEV_ERRORED)); 5557 } 5558 5559 /* Now verify each extent */ 5560 if ((options & MDCMD_VERBOSE) != 0) 5561 (void) printf("%14s %21s %21s %21s\n", 5562 dgettext(TEXT_DOMAIN, "Extent Number"), 5563 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5564 dgettext(TEXT_DOMAIN, "Physical Offset"), 5565 dgettext(TEXT_DOMAIN, "Length")); 5566 5567 curvoff = 0ULL; 5568 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5569 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5570 5571 if ((options & MDCMD_VERBOSE) != 0) 5572 (void) printf("%14u %21llu %21llu %21llu\n", 5573 extn, extp->voff, extp->poff, extp->len); 5574 5575 if (extp->voff != curvoff) { 5576 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5577 "%s: virtual offset for extent %u " 5578 "is inconsistent, expected %llu, " 5579 "got %llu.\n"), curnp->cname, extn, 5580 curvoff, extp->voff); 5581 return (mdmderror(ep, MDE_RECOVER_FAILED, 5582 0, compnp->cname)); 5583 } 5584 5585 /* make sure extent does not drop off the end */ 5586 if ((extp->poff + extp->len) == size) { 5587 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5588 "%s: extent %u at offset %llu, " 5589 "length %llu exceeds the size of the " 5590 "device, %llu.\n"), curnp->cname, 5591 extn, extp->poff, extp->len, size); 5592 return (mdmderror(ep, MDE_RECOVER_FAILED, 5593 0, compnp->cname)); 5594 } 5595 5596 curvoff += extp->len; 5597 } 5598 } 5599 if (options & MDCMD_PRINT) { 5600 (void) printf(dgettext(TEXT_DOMAIN, 5601 "%s: Soft Partition metadb configuration is valid\n"), 5602 compnp->cname); 5603 } 5604 return (0); 5605 } 5606 5607 /* 5608 * FUNCTION: meta_sp_validate_wm_and_unit() 5609 * INPUT: sp - name of set we are recovering in 5610 * compnp - name of device we are recovering from 5611 * options - metarecover options 5612 * OUTPUT: ep - error pointer returned 5613 * RETURNS: int - 0 - success, -1 error 5614 * PURPOSE: cross-validate and display watermarks and metadb records. 5615 * get both the unit structures for the soft partitions built 5616 * on the specified component and the watermarks found on that 5617 * component and check to make sure they are consistent with 5618 * each other. 5619 */ 5620 static int 5621 meta_sp_validate_wm_and_unit( 5622 mdsetname_t *sp, 5623 mdname_t *np, 5624 mdcmdopts_t options, 5625 md_error_t *ep 5626 ) 5627 { 5628 sp_ext_node_t *wmlist = NULL; 5629 sp_ext_node_t *unitlist = NULL; 5630 sp_ext_node_t *unitext; 5631 sp_ext_node_t *wmext; 5632 sp_ext_offset_t tmpunitoff; 5633 mdnamelist_t *spnlp = NULL; 5634 int count; 5635 int rval = 0; 5636 int verbose = (options & MDCMD_VERBOSE); 5637 5638 /* get unit structure list */ 5639 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5640 if (count <= 0) 5641 return (-1); 5642 5643 meta_sp_list_insert(NULL, NULL, &unitlist, 5644 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5645 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5646 5647 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5648 metafreenamelist(spnlp); 5649 return (-1); 5650 } 5651 5652 metafreenamelist(spnlp); 5653 5654 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5655 5656 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5657 meta_sp_cmp_by_offset, ep) < 0) { 5658 meta_sp_list_free(&unitlist); 5659 return (-1); 5660 } 5661 5662 if (getenv(META_SP_DEBUG)) { 5663 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5664 meta_sp_list_dump(unitlist); 5665 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5666 meta_sp_list_dump(wmlist); 5667 } 5668 5669 /* 5670 * step through both lists and compare allocated nodes. Free 5671 * nodes and end watermarks may differ between the two but 5672 * that's generally ok, and if they're wrong will typically 5673 * cause misplaced allocated extents. 5674 */ 5675 if (verbose) 5676 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5677 "allocations match extent headers.\n"), np->cname); 5678 5679 unitext = unitlist; 5680 wmext = wmlist; 5681 while ((wmext != NULL) && (unitext != NULL)) { 5682 /* find next allocated extents in each list */ 5683 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5684 wmext = wmext->ext_next; 5685 5686 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5687 unitext = unitext->ext_next; 5688 5689 if (wmext == NULL || unitext == NULL) 5690 break; 5691 5692 if (verbose) { 5693 (void) printf(dgettext(TEXT_DOMAIN, 5694 "Metadb extent:\n")); 5695 meta_sp_display_exthdr(); 5696 meta_sp_display_ext(unitext); 5697 (void) printf(dgettext(TEXT_DOMAIN, 5698 "Extent header extent:\n")); 5699 meta_sp_display_exthdr(); 5700 meta_sp_display_ext(wmext); 5701 (void) printf("\n"); 5702 } 5703 5704 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5705 rval = -1; 5706 5707 /* 5708 * if the offsets aren't equal, only increment the 5709 * lowest one in hopes of getting the lists back in sync. 5710 */ 5711 tmpunitoff = unitext->ext_offset; 5712 if (unitext->ext_offset <= wmext->ext_offset) 5713 unitext = unitext->ext_next; 5714 if (wmext->ext_offset <= tmpunitoff) 5715 wmext = wmext->ext_next; 5716 } 5717 5718 /* 5719 * if both lists aren't at the end then there are extra 5720 * allocated nodes in one of them. 5721 */ 5722 if (wmext != NULL) { 5723 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5724 "%s: extent headers contain allocations not in " 5725 "the metadb\n\n"), np->cname); 5726 rval = -1; 5727 } 5728 5729 if (unitext != NULL) { 5730 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5731 "%s: metadb contains allocations not in the extent " 5732 "headers\n\n"), np->cname); 5733 rval = -1; 5734 } 5735 5736 if (options & MDCMD_PRINT) { 5737 if (rval == 0) { 5738 (void) printf(dgettext(TEXT_DOMAIN, 5739 "%s: Soft Partition metadb matches extent " 5740 "header configuration\n"), np->cname); 5741 } else { 5742 (void) printf(dgettext(TEXT_DOMAIN, 5743 "%s: Soft Partition metadb does not match extent " 5744 "header configuration\n"), np->cname); 5745 } 5746 } 5747 5748 return (rval); 5749 } 5750 5751 /* 5752 * FUNCTION: meta_sp_validate_exts() 5753 * INPUT: compnp - name pointer for device we are recovering from 5754 * wmext - extent node representing watermark 5755 * unitext - extent node from unit structure 5756 * OUTPUT: ep - return error pointer 5757 * RETURNS: int - 0 - succes, mdmderror return code - error 5758 * PURPOSE: Takes two extent nodes and checks them against each other. 5759 * offset, length, sequence number, set, and name are compared. 5760 */ 5761 static int 5762 meta_sp_validate_exts( 5763 mdname_t *compnp, 5764 sp_ext_node_t *wmext, 5765 sp_ext_node_t *unitext, 5766 md_error_t *ep 5767 ) 5768 { 5769 if (wmext->ext_offset != unitext->ext_offset) { 5770 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5771 "%s: unit structure and extent header offsets differ.\n"), 5772 compnp->cname); 5773 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5774 } 5775 5776 if (wmext->ext_length != unitext->ext_length) { 5777 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5778 "%s: unit structure and extent header lengths differ.\n"), 5779 compnp->cname); 5780 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5781 } 5782 5783 if (wmext->ext_seq != unitext->ext_seq) { 5784 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5785 "%s: unit structure and extent header sequence numbers " 5786 "differ.\n"), compnp->cname); 5787 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5788 } 5789 5790 if (wmext->ext_type != unitext->ext_type) { 5791 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5792 "%s: unit structure and extent header types differ.\n"), 5793 compnp->cname); 5794 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5795 } 5796 5797 /* 5798 * If one has a set pointer and the other doesn't, error. 5799 * If both extents have setnames, then make sure they match 5800 * If both are NULL, it's ok, they match. 5801 */ 5802 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5803 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5804 "%s: unit structure and extent header set values " 5805 "differ.\n"), compnp->cname); 5806 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5807 } 5808 5809 if (unitext->ext_setp != NULL) { 5810 if (strcmp(unitext->ext_setp->setname, 5811 wmext->ext_setp->setname) != 0) { 5812 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5813 "%s: unit structure and extent header set names " 5814 "differ.\n"), compnp->cname); 5815 return (mdmderror(ep, MDE_RECOVER_FAILED, 5816 0, compnp->cname)); 5817 } 5818 } 5819 5820 /* 5821 * If one has a name pointer and the other doesn't, error. 5822 * If both extents have names, then make sure they match 5823 * If both are NULL, it's ok, they match. 5824 */ 5825 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5826 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5827 "%s: unit structure and extent header name values " 5828 "differ.\n"), compnp->cname); 5829 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5830 } 5831 5832 if (unitext->ext_namep != NULL) { 5833 if (strcmp(wmext->ext_namep->cname, 5834 unitext->ext_namep->cname) != 0) { 5835 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5836 "%s: unit structure and extent header names " 5837 "differ.\n"), compnp->cname); 5838 return (mdmderror(ep, MDE_RECOVER_FAILED, 5839 0, compnp->cname)); 5840 } 5841 } 5842 5843 return (0); 5844 } 5845 5846 /* 5847 * FUNCTION: update_sp_status() 5848 * INPUT: sp - name of set we are recovering in 5849 * minors - pointer to an array of soft partition minor numbers 5850 * num_sps - number of minor numbers in array 5851 * status - new status to be applied to all soft parts in array 5852 * mn_set - set if current set is a multi-node set 5853 * OUTPUT: ep - return error pointer 5854 * RETURNS: int - 0 - success, -1 - error 5855 * PURPOSE: update status of soft partitions to new status. minors is an 5856 * array of minor numbers to apply the new status to. 5857 * If mn_set is set, a message is sent to all nodes in the 5858 * cluster to update the status locally. 5859 */ 5860 static int 5861 update_sp_status( 5862 mdsetname_t *sp, 5863 minor_t *minors, 5864 int num_sps, 5865 sp_status_t status, 5866 bool_t mn_set, 5867 md_error_t *ep 5868 ) 5869 { 5870 int i; 5871 int err = 0; 5872 5873 if (mn_set) { 5874 md_mn_msg_sp_setstat_t sp_setstat_params; 5875 int result; 5876 md_mn_result_t *resp = NULL; 5877 5878 for (i = 0; i < num_sps; i++) { 5879 sp_setstat_params.sp_setstat_mnum = minors[i]; 5880 sp_setstat_params.sp_setstat_status = status; 5881 5882 result = mdmn_send_message(sp->setno, 5883 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 5884 (char *)&sp_setstat_params, 5885 sizeof (sp_setstat_params), 5886 &resp, ep); 5887 if (resp != NULL) { 5888 if (resp->mmr_exitval != 0) 5889 err = -1; 5890 free_result(resp); 5891 } 5892 if (result != 0) { 5893 err = -1; 5894 } 5895 } 5896 } else { 5897 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5898 err = -1; 5899 } 5900 if (err < 0) { 5901 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5902 "Error updating status on recovered soft " 5903 "partitions.\n")); 5904 } 5905 return (err); 5906 } 5907 5908 /* 5909 * FUNCTION: meta_sp_recover_from_wm() 5910 * INPUT: sp - name of set we are recovering in 5911 * compnp - name pointer for component we are recovering from 5912 * options - metarecover options 5913 * OUTPUT: ep - return error pointer 5914 * RETURNS: int - 0 - success, -1 - error 5915 * PURPOSE: update metadb records to match watermarks. begin by getting 5916 * an extlist representing all soft partitions on the component. 5917 * then build a unit structure for each soft partition. 5918 * notify user of changes, then commit each soft partition to 5919 * the metadb one at a time in the "recovering" state. update 5920 * any watermarks that may need it (to reflect possible name 5921 * changes), and, finally, set the status of all recovered 5922 * partitions to the "OK" state at once. 5923 */ 5924 static int 5925 meta_sp_recover_from_wm( 5926 mdsetname_t *sp, 5927 mdname_t *compnp, 5928 mdcmdopts_t options, 5929 md_error_t *ep 5930 ) 5931 { 5932 sp_ext_node_t *extlist = NULL; 5933 sp_ext_node_t *sp_list = NULL; 5934 sp_ext_node_t *update_list = NULL; 5935 sp_ext_node_t *ext; 5936 sp_ext_node_t *sp_ext; 5937 mp_unit_t *mp; 5938 mp_unit_t **un_array; 5939 int numexts = 0, num_sps = 0, i = 0; 5940 int err = 0; 5941 int not_recovered = 0; 5942 int committed = 0; 5943 sp_ext_length_t sp_length = 0LL; 5944 mdnamelist_t *keynlp = NULL; 5945 mdname_t *np; 5946 mdname_t *new_np; 5947 int new_name; 5948 md_set_params_t set_params; 5949 minor_t *minors = NULL; 5950 char yesno[255]; 5951 char *yes; 5952 bool_t mn_set = 0; 5953 md_set_desc *sd; 5954 mm_unit_t *mm; 5955 md_set_mmown_params_t *ownpar = NULL; 5956 int comp_is_mirror = 0; 5957 5958 /* 5959 * if this component appears in another metadevice already, do 5960 * NOT recover from it. 5961 */ 5962 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5963 return (-1); 5964 5965 /* set flag if dealing with a MN set */ 5966 if (!metaislocalset(sp)) { 5967 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5968 return (-1); 5969 } 5970 if (MD_MNSET_DESC(sd)) 5971 mn_set = 1; 5972 } 5973 /* 5974 * for each watermark, build an ext_node, place on list. 5975 */ 5976 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5977 meta_sp_cmp_by_nameseq, ep) < 0) 5978 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5979 5980 assert(extlist != NULL); 5981 5982 /* count number of soft partitions */ 5983 for (ext = extlist; 5984 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5985 ext = ext->ext_next) { 5986 if (ext->ext_next != NULL && 5987 ext->ext_next->ext_namep != NULL && 5988 strcmp(ext->ext_next->ext_namep->cname, 5989 ext->ext_namep->cname) == 0) 5990 continue; 5991 num_sps++; 5992 } 5993 5994 /* allocate array of unit structure pointers */ 5995 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5996 5997 /* 5998 * build unit structures from list of ext_nodes. 5999 */ 6000 for (ext = extlist; 6001 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 6002 ext = ext->ext_next) { 6003 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 6004 &sp_list, ext->ext_offset, ext->ext_length, 6005 ext->ext_type, ext->ext_seq, ext->ext_flags, 6006 meta_sp_cmp_by_nameseq); 6007 6008 numexts++; 6009 sp_length += ext->ext_length - MD_SP_WMSIZE; 6010 6011 if (ext->ext_next != NULL && 6012 ext->ext_next->ext_namep != NULL && 6013 strcmp(ext->ext_next->ext_namep->cname, 6014 ext->ext_namep->cname) == 0) 6015 continue; 6016 6017 /* 6018 * if we made it here, we are at a soft partition 6019 * boundary in the list. 6020 */ 6021 if (getenv(META_SP_DEBUG)) { 6022 meta_sp_debug("meta_recover_from_wm: dumping wm " 6023 "list:\n"); 6024 meta_sp_list_dump(sp_list); 6025 } 6026 6027 assert(sp_list != NULL); 6028 assert(sp_list->ext_namep != NULL); 6029 6030 if ((new_name = meta_sp_resolve_name_conflict(sp, 6031 sp_list->ext_namep, &new_np, ep)) < 0) { 6032 err = 1; 6033 goto out; 6034 } else if (new_name) { 6035 for (sp_ext = sp_list; 6036 sp_ext != NULL; 6037 sp_ext = sp_ext->ext_next) { 6038 /* 6039 * insert into the update list for 6040 * watermark update. 6041 */ 6042 meta_sp_list_insert(sp_ext->ext_setp, 6043 new_np, &update_list, sp_ext->ext_offset, 6044 sp_ext->ext_length, sp_ext->ext_type, 6045 sp_ext->ext_seq, EXTFLG_UPDATE, 6046 meta_sp_cmp_by_offset); 6047 } 6048 6049 } 6050 if (options & MDCMD_DOIT) { 6051 /* store name in namespace */ 6052 if (mn_set) { 6053 /* send message to all nodes to return key */ 6054 md_mn_msg_addkeyname_t *send_params; 6055 int result; 6056 md_mn_result_t *resp = NULL; 6057 int message_size; 6058 6059 message_size = sizeof (*send_params) + 6060 strlen(compnp->cname) + 1; 6061 send_params = Zalloc(message_size); 6062 send_params->addkeyname_setno = sp->setno; 6063 (void) strcpy(&send_params->addkeyname_name[0], 6064 compnp->cname); 6065 result = mdmn_send_message(sp->setno, 6066 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6067 (char *)send_params, message_size, &resp, 6068 ep); 6069 Free(send_params); 6070 if (resp != NULL) { 6071 if (resp->mmr_exitval >= 0) { 6072 compnp->key = 6073 (mdkey_t)resp->mmr_exitval; 6074 } else { 6075 err = 1; 6076 free_result(resp); 6077 goto out; 6078 } 6079 free_result(resp); 6080 } 6081 if (result != 0) { 6082 err = 1; 6083 goto out; 6084 } 6085 (void) metanamelist_append(&keynlp, compnp); 6086 } else { 6087 if (add_key_name(sp, compnp, &keynlp, 6088 ep) != 0) { 6089 err = 1; 6090 goto out; 6091 } 6092 } 6093 } 6094 6095 /* create the unit structure */ 6096 if ((mp = meta_sp_createunit( 6097 (new_name) ? new_np : sp_list->ext_namep, compnp, 6098 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6099 err = 1; 6100 goto out; 6101 } 6102 6103 if (getenv(META_SP_DEBUG)) { 6104 meta_sp_debug("meta_sp_recover_from_wm: " 6105 "printing newly created unit structure"); 6106 meta_sp_printunit(mp); 6107 } 6108 6109 /* place in unit structure array */ 6110 un_array[i++] = mp; 6111 6112 /* free sp_list */ 6113 meta_sp_list_free(&sp_list); 6114 sp_list = NULL; 6115 numexts = 0; 6116 sp_length = 0LL; 6117 } 6118 6119 /* display configuration updates */ 6120 (void) printf(dgettext(TEXT_DOMAIN, 6121 "The following soft partitions were found and will be added to\n" 6122 "your metadevice configuration.\n")); 6123 (void) printf("%5s %15s %18s\n", 6124 dgettext(TEXT_DOMAIN, "Name"), 6125 dgettext(TEXT_DOMAIN, "Size"), 6126 dgettext(TEXT_DOMAIN, "No. of Extents")); 6127 for (i = 0; i < num_sps; i++) { 6128 (void) printf("%5s%lu %15llu %9d\n", "d", 6129 MD_MIN2UNIT(MD_SID(un_array[i])), 6130 un_array[i]->un_length, un_array[i]->un_numexts); 6131 } 6132 6133 if (!(options & MDCMD_DOIT)) { 6134 not_recovered = 1; 6135 goto out; 6136 } 6137 6138 /* ask user for confirmation */ 6139 (void) printf(dgettext(TEXT_DOMAIN, 6140 "WARNING: You are about to add one or more soft partition\n" 6141 "metadevices to your metadevice configuration. If there\n" 6142 "appears to be an error in the soft partition(s) displayed\n" 6143 "above, do NOT proceed with this recovery operation.\n")); 6144 (void) printf(dgettext(TEXT_DOMAIN, 6145 "Are you sure you want to do this (yes/no)? ")); 6146 6147 (void) fflush(stdout); 6148 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6149 (strlen(yesno) == 1)) 6150 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6151 dgettext(TEXT_DOMAIN, "no")); 6152 yes = dgettext(TEXT_DOMAIN, "yes"); 6153 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6154 not_recovered = 1; 6155 goto out; 6156 } 6157 6158 /* commit records one at a time */ 6159 for (i = 0; i < num_sps; i++) { 6160 (void) memset(&set_params, 0, sizeof (set_params)); 6161 set_params.mnum = MD_SID(un_array[i]); 6162 set_params.size = (un_array[i])->c.un_size; 6163 set_params.mdp = (uintptr_t)(un_array[i]); 6164 set_params.options = 6165 meta_check_devicesize(un_array[i]->un_length); 6166 if (set_params.options == MD_CRO_64BIT) { 6167 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6168 } else { 6169 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6170 } 6171 MD_SETDRIVERNAME(&set_params, MD_SP, 6172 MD_MIN2SET(set_params.mnum)); 6173 6174 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6175 6176 /* 6177 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6178 */ 6179 if (mn_set) { 6180 md_mn_msg_iocset_t send_params; 6181 int result; 6182 md_mn_result_t *resp = NULL; 6183 int mess_size; 6184 6185 /* 6186 * Calculate message size. md_mn_msg_iocset_t only 6187 * contains one extent, so increment the size to 6188 * include all extents 6189 */ 6190 mess_size = sizeof (send_params) - 6191 sizeof (mp_ext_t) + 6192 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6193 6194 send_params.iocset_params = set_params; 6195 (void) memcpy(&send_params.unit, un_array[i], 6196 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6197 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6198 result = mdmn_send_message(sp->setno, 6199 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 6200 (char *)&send_params, mess_size, &resp, 6201 ep); 6202 if (resp != NULL) { 6203 if (resp->mmr_exitval != 0) 6204 err = 1; 6205 free_result(resp); 6206 } 6207 if (result != 0) { 6208 err = 1; 6209 } 6210 } else { 6211 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6212 np->cname) != 0) { 6213 err = 1; 6214 } 6215 } 6216 6217 if (err == 1) { 6218 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6219 "%s: Error committing record to metadb.\n"), 6220 np->cname); 6221 goto out; 6222 } 6223 6224 /* note that we've committed a record */ 6225 if (!committed) 6226 committed = 1; 6227 6228 /* update any watermarks that need it */ 6229 if (update_list != NULL) { 6230 md_sp_t *msp; 6231 6232 /* 6233 * Check to see if we're trying to create a partition 6234 * on a mirror. If so we may have to enforce an 6235 * ownership change before writing the watermark out. 6236 */ 6237 if (metaismeta(compnp)) { 6238 char *miscname; 6239 6240 miscname = metagetmiscname(compnp, ep); 6241 if (miscname != NULL) 6242 comp_is_mirror = (strcmp(miscname, 6243 MD_MIRROR) == 0); 6244 else 6245 comp_is_mirror = 0; 6246 } 6247 /* 6248 * If this is a MN set and the component is a mirror, 6249 * change ownership to this node in order to write the 6250 * watermarks 6251 */ 6252 if (mn_set && comp_is_mirror) { 6253 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6254 if (mm == NULL) { 6255 err = 1; 6256 goto out; 6257 } else { 6258 err = meta_mn_change_owner(&ownpar, 6259 sp->setno, 6260 meta_getminor(compnp->dev), 6261 sd->sd_mn_mynode->nd_nodeid, 6262 MD_MN_MM_PREVENT_CHANGE | 6263 MD_MN_MM_SPAWN_THREAD); 6264 if (err != 0) 6265 goto out; 6266 } 6267 } 6268 6269 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6270 err = 1; 6271 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6272 "%s: Error updating extent headers.\n"), 6273 np->cname); 6274 goto out; 6275 } 6276 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6277 err = 1; 6278 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6279 "%s: Error updating extent headers " 6280 "on disk.\n"), np->cname); 6281 goto out; 6282 } 6283 } 6284 /* 6285 * If we have changed ownership earlier and prevented any 6286 * ownership changes, we can now allow ownership changes 6287 * again. 6288 */ 6289 if (ownpar) { 6290 (void) meta_mn_change_owner(&ownpar, sp->setno, 6291 ownpar->d.mnum, 6292 ownpar->d.owner, 6293 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6294 } 6295 } 6296 6297 /* update status of all soft partitions to OK */ 6298 minors = Zalloc(num_sps * sizeof (minor_t)); 6299 for (i = 0; i < num_sps; i++) 6300 minors[i] = MD_SID(un_array[i]); 6301 6302 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6303 if (err != 0) 6304 goto out; 6305 6306 if (options & MDCMD_PRINT) 6307 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6308 "Soft Partitions recovered from device.\n"), 6309 compnp->cname); 6310 out: 6311 /* free memory */ 6312 if (extlist != NULL) 6313 meta_sp_list_free(&extlist); 6314 if (sp_list != NULL) 6315 meta_sp_list_free(&sp_list); 6316 if (update_list != NULL) 6317 meta_sp_list_free(&update_list); 6318 if (un_array != NULL) { 6319 for (i = 0; i < num_sps; i++) 6320 Free(un_array[i]); 6321 Free(un_array); 6322 } 6323 if (minors != NULL) 6324 Free(minors); 6325 if (ownpar != NULL) 6326 Free(ownpar); 6327 (void) fflush(stdout); 6328 6329 if ((keynlp != NULL) && (committed != 1)) { 6330 /* 6331 * if we haven't committed any softparts, either because of an 6332 * error or because the user decided not to proceed, delete 6333 * namelist key for the component 6334 */ 6335 if (mn_set) { 6336 mdnamelist_t *p; 6337 6338 for (p = keynlp; (p != NULL); p = p->next) { 6339 mdname_t *np = p->namep; 6340 md_mn_msg_delkeyname_t send_params; 6341 md_mn_result_t *resp = NULL; 6342 6343 send_params.delkeyname_dev = np->dev; 6344 send_params.delkeyname_setno = sp->setno; 6345 send_params.delkeyname_key = np->key; 6346 (void) mdmn_send_message(sp->setno, 6347 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6348 (char *)&send_params, sizeof (send_params), 6349 &resp, ep); 6350 if (resp != NULL) { 6351 free_result(resp); 6352 } 6353 } 6354 } else { 6355 (void) del_key_names(sp, keynlp, NULL); 6356 } 6357 } 6358 6359 metafreenamelist(keynlp); 6360 6361 if (err) 6362 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6363 6364 if (not_recovered) 6365 if (options & MDCMD_PRINT) 6366 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6367 "Soft Partitions NOT recovered from device.\n"), 6368 compnp->cname); 6369 return (0); 6370 } 6371 6372 /* 6373 * FUNCTION: meta_sp_recover_from_unit() 6374 * INPUT: sp - name of set we are recovering in 6375 * compnp - name of component we are recovering from 6376 * options - metarecover options 6377 * OUTPUT: ep - return error pointer 6378 * RETURNS: int - 0 - success, -1 - error 6379 * PURPOSE: update watermarks to match metadb records. begin by getting 6380 * a namelist representing all soft partitions on the specified 6381 * component. then, build an extlist representing the soft 6382 * partitions, filling in the freespace extents. notify user 6383 * of changes, place all soft partitions into the "recovering" 6384 * state and update the watermarks. finally, return all soft 6385 * partitions to the "OK" state. 6386 */ 6387 static int 6388 meta_sp_recover_from_unit( 6389 mdsetname_t *sp, 6390 mdname_t *compnp, 6391 mdcmdopts_t options, 6392 md_error_t *ep 6393 ) 6394 { 6395 mdnamelist_t *spnlp = NULL; 6396 mdnamelist_t *nlp = NULL; 6397 sp_ext_node_t *ext = NULL; 6398 sp_ext_node_t *extlist = NULL; 6399 int count; 6400 char yesno[255]; 6401 char *yes; 6402 int rval = 0; 6403 minor_t *minors = NULL; 6404 int i; 6405 md_sp_t *msp; 6406 md_set_desc *sd; 6407 bool_t mn_set = 0; 6408 daddr_t start_block; 6409 6410 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6411 if (count <= 0) 6412 return (-1); 6413 6414 /* set flag if dealing with a MN set */ 6415 if (!metaislocalset(sp)) { 6416 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6417 return (-1); 6418 } 6419 if (MD_MNSET_DESC(sd)) 6420 mn_set = 1; 6421 } 6422 /* 6423 * Save the XDR unit structure for one of the soft partitions; 6424 * we'll use this later to provide metadevice context to 6425 * update the watermarks so the device can be resolved by 6426 * devid instead of dev_t. 6427 */ 6428 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6429 metafreenamelist(spnlp); 6430 return (-1); 6431 } 6432 6433 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6434 MD_DISKADDR_ERROR) { 6435 return (-1); 6436 } 6437 6438 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6439 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6440 meta_sp_list_insert(NULL, NULL, &extlist, 6441 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6442 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6443 6444 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6445 metafreenamelist(spnlp); 6446 return (-1); 6447 } 6448 6449 assert(extlist != NULL); 6450 if ((options & MDCMD_VERBOSE) != 0) { 6451 (void) printf(dgettext(TEXT_DOMAIN, 6452 "Updating extent headers on device %s from metadb.\n\n"), 6453 compnp->cname); 6454 (void) printf(dgettext(TEXT_DOMAIN, 6455 "The following extent headers will be written:\n")); 6456 meta_sp_display_exthdr(); 6457 } 6458 6459 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6460 6461 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6462 6463 /* mark every node for updating except the reserved space */ 6464 if (ext->ext_type != EXTTYP_RESERVED) { 6465 ext->ext_flags |= EXTFLG_UPDATE; 6466 6467 /* print extent information */ 6468 if ((options & MDCMD_VERBOSE) != 0) 6469 meta_sp_display_ext(ext); 6470 } 6471 } 6472 6473 /* request verification and then update all watermarks */ 6474 if ((options & MDCMD_DOIT) != 0) { 6475 6476 (void) printf(dgettext(TEXT_DOMAIN, 6477 "\nWARNING: You are about to overwrite portions of %s\n" 6478 "with soft partition metadata. The extent headers will be\n" 6479 "written to match the existing metadb configuration. If\n" 6480 "the device was not previously setup with this\n" 6481 "configuration, data loss may result.\n\n"), 6482 compnp->cname); 6483 (void) printf(dgettext(TEXT_DOMAIN, 6484 "Are you sure you want to do this (yes/no)? ")); 6485 6486 (void) fflush(stdout); 6487 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6488 (strlen(yesno) == 1)) 6489 (void) snprintf(yesno, sizeof (yesno), 6490 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6491 yes = dgettext(TEXT_DOMAIN, "yes"); 6492 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6493 /* place soft partitions into recovering state */ 6494 minors = Zalloc(count * sizeof (minor_t)); 6495 for (nlp = spnlp, i = 0; 6496 nlp != NULL && i < count; 6497 nlp = nlp->next, i++) { 6498 assert(nlp->namep != NULL); 6499 minors[i] = meta_getminor(nlp->namep->dev); 6500 } 6501 if (update_sp_status(sp, minors, count, 6502 MD_SP_RECOVER, mn_set, ep) != 0) { 6503 rval = -1; 6504 goto out; 6505 } 6506 6507 /* update the watermarks */ 6508 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6509 rval = -1; 6510 goto out; 6511 } 6512 6513 if (options & MDCMD_PRINT) { 6514 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6515 "Soft Partitions recovered from metadb\n"), 6516 compnp->cname); 6517 } 6518 6519 /* return soft partitions to the OK state */ 6520 if (update_sp_status(sp, minors, count, 6521 MD_SP_OK, mn_set, ep) != 0) { 6522 rval = -1; 6523 goto out; 6524 } 6525 6526 rval = 0; 6527 goto out; 6528 } 6529 } 6530 6531 if (options & MDCMD_PRINT) { 6532 (void) printf(dgettext(TEXT_DOMAIN, 6533 "%s: Soft Partitions NOT recovered from metadb\n"), 6534 compnp->cname); 6535 } 6536 6537 out: 6538 if (minors != NULL) 6539 Free(minors); 6540 metafreenamelist(spnlp); 6541 meta_sp_list_free(&extlist); 6542 (void) fflush(stdout); 6543 return (rval); 6544 } 6545 6546 6547 /* 6548 * FUNCTION: meta_sp_update_abr() 6549 * INPUT: sp - name of set we are recovering in 6550 * OUTPUT: ep - return error pointer 6551 * RETURNS: int - 0 - success, -1 - error 6552 * PURPOSE: update the ABR state for all soft partitions in the set. This 6553 * is called when joining a set. It sends a message to the master 6554 * node for each soft partition to get the value of tstate and 6555 * then sets ABR ,if required, by opening the sp, setting ABR 6556 * and then closing the sp. This approach is taken rather that 6557 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6558 * the case when we have another node simultaneously unsetting ABR. 6559 */ 6560 int 6561 meta_sp_update_abr( 6562 mdsetname_t *sp, 6563 md_error_t *ep 6564 ) 6565 { 6566 mdnamelist_t *devnlp = NULL; 6567 mdnamelist_t *p; 6568 mdname_t *devnp = NULL; 6569 md_unit_t *un; 6570 char fname[MAXPATHLEN]; 6571 int mnum, fd; 6572 volcap_t vc; 6573 uint_t tstate; 6574 6575 6576 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6577 return (-1); 6578 } 6579 6580 /* Exit if no soft partitions in this set */ 6581 if (devnlp == NULL) 6582 return (0); 6583 6584 /* For each soft partition */ 6585 for (p = devnlp; (p != NULL); p = p->next) { 6586 devnp = p->namep; 6587 6588 /* check if this is a top level metadevice */ 6589 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6590 goto out; 6591 if (MD_HAS_PARENT(MD_PARENT(un))) { 6592 Free(un); 6593 continue; 6594 } 6595 Free(un); 6596 6597 /* Get tstate from Master */ 6598 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6599 mdname_t *np; 6600 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6601 ep); 6602 if (np) { 6603 md_perror(dgettext(TEXT_DOMAIN, 6604 "Unable to get tstate for %s"), np->cname); 6605 } 6606 continue; 6607 } 6608 /* If not set on the master, nothing to do */ 6609 if (!(tstate & MD_ABR_CAP)) 6610 continue; 6611 6612 mnum = meta_getminor(devnp->dev); 6613 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6614 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6615 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6616 md_perror(dgettext(TEXT_DOMAIN, 6617 "Could not open device %s"), fname); 6618 continue; 6619 } 6620 6621 /* Set ABR state */ 6622 vc.vc_info = 0; 6623 vc.vc_set = 0; 6624 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6625 (void) close(fd); 6626 continue; 6627 } 6628 6629 vc.vc_set = DKV_ABR_CAP; 6630 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6631 (void) close(fd); 6632 goto out; 6633 } 6634 6635 (void) close(fd); 6636 } 6637 metafreenamelist(devnlp); 6638 return (0); 6639 out: 6640 metafreenamelist(devnlp); 6641 return (-1); 6642 } 6643 6644 /* 6645 * FUNCTION: meta_mn_sp_update_abr() 6646 * INPUT: arg - Given set. 6647 * PURPOSE: update the ABR state for all soft partitions in the set by 6648 * forking a process to call meta_sp_update_abr() 6649 * This function is only called via rpc.metad when adding a node 6650 * to a set, ie this node is beong joined to the set by another 6651 * node. 6652 */ 6653 void * 6654 meta_mn_sp_update_abr(void *arg) 6655 { 6656 set_t setno = *((set_t *)arg); 6657 mdsetname_t *sp; 6658 md_error_t mde = mdnullerror; 6659 int fval; 6660 6661 /* should have a set */ 6662 assert(setno != NULL); 6663 6664 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6665 mde_perror(&mde, ""); 6666 return (NULL); 6667 } 6668 6669 if (!(meta_is_mn_set(sp, &mde))) { 6670 mde_perror(&mde, ""); 6671 return (NULL); 6672 } 6673 6674 /* fork a process */ 6675 if ((fval = md_daemonize(sp, &mde)) != 0) { 6676 /* 6677 * md_daemonize will fork off a process. The is the 6678 * parent or error. 6679 */ 6680 if (fval > 0) { 6681 return (NULL); 6682 } 6683 mde_perror(&mde, ""); 6684 return (NULL); 6685 } 6686 /* 6687 * Child process should never return back to rpc.metad, but 6688 * should exit. 6689 * Flush all internally cached data inherited from parent process 6690 * since cached data will be cleared when parent process RPC request 6691 * has completed (which is possibly before this child process 6692 * can complete). 6693 * Child process can retrieve and cache its own copy of data from 6694 * rpc.metad that won't be changed by the parent process. 6695 * 6696 * Reset md_in_daemon since this child will be a client of rpc.metad 6697 * not part of the rpc.metad daemon itself. 6698 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6699 * this thread is rpc.metad or any other thread. (If this thread 6700 * was rpc.metad it could use some short circuit code to get data 6701 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6702 */ 6703 md_in_daemon = 0; 6704 metaflushsetname(sp); 6705 sr_cache_flush_setno(setno); 6706 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6707 mde_perror(&mde, ""); 6708 md_exit(sp, 1); 6709 } 6710 6711 6712 /* 6713 * Closing stdin/out/err here. 6714 */ 6715 (void) close(0); 6716 (void) close(1); 6717 (void) close(2); 6718 assert(fval == 0); 6719 6720 (void) meta_sp_update_abr(sp, &mde); 6721 6722 md_exit(sp, 0); 6723 /*NOTREACHED*/ 6724 return (NULL); 6725 } 6726