1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Just in case we're not in a build environment, make sure that 29 * TEXT_DOMAIN gets set to something. 30 */ 31 #if !defined(TEXT_DOMAIN) 32 #define TEXT_DOMAIN "SYS_TEST" 33 #endif 34 35 /* 36 * soft partition operations 37 * 38 * Soft Partitions provide a virtual disk mechanism which is used to 39 * divide a large volume into many small pieces, each appearing as a 40 * separate device. A soft partition consists of a series of extents, 41 * each having an offset and a length. The extents are logically 42 * contiguous, so where the first extent leaves off the second extent 43 * picks up. Which extent a given "virtual offset" belongs to is 44 * dependent on the size of all the previous extents in the soft 45 * partition. 46 * 47 * Soft partitions are represented in memory by an extent node 48 * (sp_ext_node_t) which contains all of the information necessary to 49 * create a unit structure and update the on-disk format, called 50 * "watermarks". These extent nodes are typically kept in a doubly 51 * linked list and are manipulated by list manipulation routines. A 52 * list of extents may represent all of the soft partitions on a volume, 53 * a single soft partition, or perhaps just a set of extents that need 54 * to be updated. Extent lists may be sorted by extent or by name/seq#, 55 * depending on which compare function is used. Most of the routines 56 * require the list be sorted by offset to work, and that's the typical 57 * configuration. 58 * 59 * In order to do an allocation, knowledge of all soft partitions on the 60 * volume is required. Then free space is determined from the space 61 * that is not allocated, and new allocations can be made from the free 62 * space. Once the new allocations are made, a unit structure is created 63 * and the watermarks are updated. The status is then changed to "okay" 64 * on the unit structure to commit the transaction. If updating the 65 * watermarks fails, the unit structure is in an intermediate state and 66 * the driver will not allow access to the device. 67 * 68 * A typical sequence of events is: 69 * 1. Fetch the list of names for all soft partitions on a volume 70 * meta_sp_get_by_component() 71 * 2. Construct an extent list from the name list 72 * meta_sp_extlist_from_namelist() 73 * 3. Fill the gaps in the extent list with free extents 74 * meta_sp_list_freefill() 75 * 4. Allocate from the free extents 76 * meta_sp_alloc_by_len() 77 * meta_sp_alloc_by_list() 78 * 5. Create the unit structure from the extent list 79 * meta_sp_createunit() 80 * meta_sp_updateunit() 81 * 6. Write out the watermarks 82 * meta_sp_update_wm() 83 * 7. Set the status to "Okay" 84 * meta_sp_setstatus() 85 * 86 */ 87 88 #include <stdio.h> 89 #include <meta.h> 90 #include "meta_repartition.h" 91 #include <sys/lvm/md_sp.h> 92 #include <sys/lvm/md_crc.h> 93 #include <strings.h> 94 #include <sys/lvm/md_mirror.h> 95 #include <sys/bitmap.h> 96 97 extern int md_in_daemon; 98 99 typedef struct sp_ext_node { 100 struct sp_ext_node *ext_next; /* next element */ 101 struct sp_ext_node *ext_prev; /* previous element */ 102 sp_ext_type_t ext_type; /* type of extent */ 103 sp_ext_offset_t ext_offset; /* starting offset */ 104 sp_ext_length_t ext_length; /* length of this node */ 105 uint_t ext_flags; /* extent flags */ 106 uint32_t ext_seq; /* watermark seq no */ 107 mdname_t *ext_namep; /* name pointer */ 108 mdsetname_t *ext_setp; /* set pointer */ 109 } sp_ext_node_t; 110 111 /* extent flags */ 112 #define EXTFLG_UPDATE (1) 113 114 /* Extent node compare function for list sorting */ 115 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 116 117 118 /* Function Prototypes */ 119 120 /* Debugging Functions */ 121 static void meta_sp_debug(char *format, ...); 122 static void meta_sp_printunit(mp_unit_t *mp); 123 124 /* Misc Support Functions */ 125 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 126 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 127 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 128 md_error_t *ep); 129 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 130 mdnamelist_t **nlpp, int force, md_error_t *ep); 131 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 132 mdname_t *compnp, md_error_t *ep); 133 134 /* Extent List Manipulation Functions */ 135 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 136 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 137 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 138 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 139 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 140 static void meta_sp_list_free(sp_ext_node_t **head); 141 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 142 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 143 sp_ext_type_t exttype, int exclude_wm); 144 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 145 sp_ext_offset_t offset); 146 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 147 sp_ext_length_t size); 148 static void meta_sp_list_dump(sp_ext_node_t *head); 149 static int meta_sp_list_overlaps(sp_ext_node_t *head); 150 151 /* Extent List Query Functions */ 152 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 153 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 154 sp_ext_length_t alignment); 155 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 156 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 157 md_error_t *ep); 158 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 159 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 160 161 162 /* Extent Allocation Functions */ 163 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 164 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 165 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 166 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 167 sp_ext_node_t **extlist, sp_ext_length_t *lp, 168 sp_ext_offset_t last_off, sp_ext_length_t alignment); 169 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 170 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 171 172 /* Extent List Population Functions */ 173 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 174 sp_ext_node_t **extlist, md_error_t *ep); 175 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 176 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 177 178 /* Print (metastat) Functions */ 179 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 180 mdprtopts_t options, md_error_t *ep); 181 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 182 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 183 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 184 185 /* Watermark Manipulation Functions */ 186 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 187 sp_ext_node_t *extlist, md_error_t *ep); 188 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 189 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 190 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 191 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 192 md_error_t *ep); 193 194 /* Unit Structure Manipulation Functions */ 195 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 196 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 197 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 198 sp_status_t status, md_error_t *ep); 199 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 200 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 201 md_error_t *ep); 202 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 203 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 204 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 205 int *repart_options, md_error_t *ep); 206 207 /* Reset (metaclear) Functions */ 208 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 209 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 210 211 /* Recovery (metarecover) Functions */ 212 static void meta_sp_display_exthdr(void); 213 static void meta_sp_display_ext(sp_ext_node_t *ext); 214 static int meta_sp_checkseq(sp_ext_node_t *extlist); 215 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 216 mdname_t **, md_error_t *); 217 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 218 mdcmdopts_t options, md_error_t *ep); 219 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 220 mdcmdopts_t options, md_error_t *ep); 221 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 222 mdcmdopts_t options, md_error_t *ep); 223 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 224 sp_ext_node_t *unitext, md_error_t *ep); 225 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 226 mdcmdopts_t options, md_error_t *ep); 227 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 228 mdcmdopts_t options, md_error_t *ep); 229 230 /* 231 * Private Constants 232 */ 233 234 static const int FORCE_RELOAD_CACHE = 1; 235 static const uint_t NO_FLAGS = 0; 236 static const sp_ext_offset_t NO_OFFSET = 0ULL; 237 static const uint_t NO_SEQUENCE_NUMBER = 0; 238 static const int ONE_SOFT_PARTITION = 1; 239 240 static unsigned long *sp_parent_printed[MD_MAXSETS]; 241 242 #define TEST_SOFT_PARTITION_NAMEP NULL 243 #define TEST_SETNAMEP NULL 244 245 #define EXCLUDE_WM (1) 246 #define INCLUDE_WM (0) 247 248 #define SP_UNALIGNED (0LL) 249 250 /* 251 * ************************************************************************** 252 * Debugging Functions * 253 * ************************************************************************** 254 */ 255 256 /*PRINTFLIKE1*/ 257 static void 258 meta_sp_debug(char *format, ...) 259 { 260 static int debug; 261 static int debug_set = 0; 262 va_list ap; 263 264 if (!debug_set) { 265 debug = getenv(META_SP_DEBUG) ? 1 : 0; 266 debug_set = 1; 267 } 268 269 if (debug) { 270 va_start(ap, format); 271 (void) vfprintf(stderr, format, ap); 272 va_end(ap); 273 } 274 } 275 276 static void 277 meta_sp_printunit(mp_unit_t *mp) 278 { 279 int i; 280 281 if (mp == NULL) 282 return; 283 284 /* print the common fields we know about */ 285 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 286 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 287 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 288 289 /* sp-specific fields */ 290 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 291 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 292 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 293 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 294 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 295 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 296 297 /* print extent information */ 298 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 299 for (i = 0; i < mp->un_numexts; i++) { 300 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 301 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 302 mp->un_ext[i].un_len); 303 } 304 } 305 306 /* 307 * FUNCTION: meta_sp_parsesize() 308 * INPUT: s - the string to parse 309 * OUTPUT: *szp - disk block count (0 for "all") 310 * RETURNS: -1 for error, 0 for success 311 * PURPOSE: parses the command line parameter that specifies the 312 * requested size of a soft partition. The input string 313 * is either the literal "all" or a numeric value 314 * followed by a single character, b for disk blocks, k 315 * for kilobytes, m for megabytes, g for gigabytes, or t 316 * for terabytes. p for petabytes and e for exabytes 317 * have been added as undocumented features for future 318 * expansion. For example, 100m is 100 megabytes, while 319 * 50g is 50 gigabytes. All values are rounded up to the 320 * nearest block size. 321 */ 322 int 323 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 324 { 325 if (s == NULL || szp == NULL) { 326 return (-1); 327 } 328 329 /* Check for literal "all" */ 330 if (strcasecmp(s, "all") == 0) { 331 *szp = 0; 332 return (0); 333 } 334 335 return (meta_sp_parsesizestring(s, szp)); 336 } 337 338 /* 339 * FUNCTION: meta_sp_parsesizestring() 340 * INPUT: s - the string to parse 341 * OUTPUT: *szp - disk block count 342 * RETURNS: -1 for error, 0 for success 343 * PURPOSE: parses a string that specifies size. The input string is a 344 * numeric value followed by a single character, b for disk blocks, 345 * k for kilobytes, m for megabytes, g for gigabytes, or t for 346 * terabytes. p for petabytes and e for exabytes have been added 347 * as undocumented features for future expansion. For example, 348 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 349 * are rounded up to the nearest block size. 350 */ 351 static int 352 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 353 { 354 sp_ext_length_t len = 0; 355 char len_type[2]; 356 357 if (s == NULL || szp == NULL) { 358 return (-1); 359 } 360 361 /* 362 * make sure block offset does not overflow 2^64 bytes. 363 */ 364 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 365 (len == 0LL) || 366 (len > (1LL << (64 - DEV_BSHIFT)))) 367 return (-1); 368 369 switch (len_type[0]) { 370 case 'B': 371 case 'b': 372 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 373 break; 374 case 'K': 375 case 'k': 376 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 377 break; 378 case 'M': 379 case 'm': 380 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 381 break; 382 case 'g': 383 case 'G': 384 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 385 break; 386 case 't': 387 case 'T': 388 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 389 DEV_BSIZE)); 390 break; 391 case 'p': 392 case 'P': 393 len = lbtodb(roundup( 394 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 395 DEV_BSIZE)); 396 break; 397 case 'e': 398 case 'E': 399 len = lbtodb(roundup( 400 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 401 DEV_BSIZE)); 402 break; 403 default: 404 /* error */ 405 return (-1); 406 } 407 408 *szp = len; 409 return (0); 410 } 411 412 /* 413 * FUNCTION: meta_sp_setgeom() 414 * INPUT: np - the underlying device to setup geometry for 415 * compnp - the underlying device to setup geometry for 416 * mp - the unit structure to set the geometry for 417 * OUTPUT: ep - return error pointer 418 * RETURNS: int - -1 if error, 0 otherwise 419 * PURPOSE: establishes geometry information for a device 420 */ 421 static int 422 meta_sp_setgeom( 423 mdname_t *np, 424 mdname_t *compnp, 425 mp_unit_t *mp, 426 md_error_t *ep 427 ) 428 { 429 mdgeom_t *geomp; 430 uint_t round_cyl = 0; 431 432 if ((geomp = metagetgeom(compnp, ep)) == NULL) 433 return (-1); 434 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 435 geomp->read_reinstruct, round_cyl, ep) != 0) 436 return (-1); 437 438 return (0); 439 } 440 441 /* 442 * FUNCTION: meta_sp_setstatus() 443 * INPUT: sp - the set name for the devices to set the status on 444 * minors - an array of minor numbers of devices to set status on 445 * num_units - number of entries in the array 446 * status - status value to set all units to 447 * OUTPUT: ep - return error pointer 448 * RETURNS: int - -1 if error, 0 success 449 * PURPOSE: sets the status of one or more soft partitions to the 450 * requested value 451 */ 452 int 453 meta_sp_setstatus( 454 mdsetname_t *sp, 455 minor_t *minors, 456 int num_units, 457 sp_status_t status, 458 md_error_t *ep 459 ) 460 { 461 md_sp_statusset_t status_params; 462 463 assert(minors != NULL); 464 465 /* update status of all soft partitions to the status passed in */ 466 (void) memset(&status_params, 0, sizeof (status_params)); 467 status_params.num_units = num_units; 468 status_params.new_status = status; 469 status_params.size = num_units * sizeof (minor_t); 470 status_params.minors = (uintptr_t)minors; 471 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 472 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 473 NULL) != 0) { 474 (void) mdstealerror(ep, &status_params.mde); 475 return (-1); 476 } 477 return (0); 478 } 479 480 /* 481 * FUNCTION: meta_get_sp_names() 482 * INPUT: sp - the set name to get soft partitions from 483 * options - options from the command line 484 * OUTPUT: nlpp - list of all soft partition names 485 * ep - return error pointer 486 * RETURNS: int - -1 if error, 0 success 487 * PURPOSE: returns a list of all soft partitions in the metadb 488 * for all devices in the specified set 489 */ 490 int 491 meta_get_sp_names( 492 mdsetname_t *sp, 493 mdnamelist_t **nlpp, 494 int options, 495 md_error_t *ep 496 ) 497 { 498 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 499 } 500 501 /* 502 * FUNCTION: meta_get_by_component() 503 * INPUT: sp - the set name to get soft partitions from 504 * compnp - the name of the device containing the soft 505 * partitions that will be returned 506 * force - 0 - reads cached namelist if available, 507 * 1 - reloads cached namelist, frees old namelist 508 * OUTPUT: nlpp - list of all soft partition names 509 * ep - return error pointer 510 * RETURNS: int - -1 error, otherwise the number of soft partitions 511 * found on the component (0 = none found). 512 * PURPOSE: returns a list of all soft partitions on a given device 513 * from the metadb information 514 */ 515 static int 516 meta_sp_get_by_component( 517 mdsetname_t *sp, 518 mdname_t *compnp, 519 mdnamelist_t **nlpp, 520 int force, 521 md_error_t *ep 522 ) 523 { 524 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 525 static int cached_count = 0; /* cached count */ 526 mdnamelist_t *spnlp = NULL; /* all sp names */ 527 mdnamelist_t *namep; /* list iterator */ 528 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 529 mdnamelist_t **cachetailpp; /* cache tail */ 530 md_sp_t *msp; /* unit structure */ 531 int count = 0; /* count of sp's */ 532 int err; 533 mdname_t *curnp; 534 535 if ((cached_list != NULL) && (!force)) { 536 /* return a copy of the cached list */ 537 for (namep = cached_list; namep != NULL; namep = namep->next) 538 tailpp = meta_namelist_append_wrapper(tailpp, 539 namep->namep); 540 return (cached_count); 541 } 542 543 /* free the cache and reset values to zeros to prepare for a new list */ 544 metafreenamelist(cached_list); 545 cached_count = 0; 546 cached_list = NULL; 547 cachetailpp = &cached_list; 548 *nlpp = NULL; 549 550 /* get all the softpartitions first of all */ 551 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 552 return (-1); 553 554 /* 555 * Now for each sp, see if it resides on the component we 556 * are interested in, if so then add it to our list 557 */ 558 for (namep = spnlp; namep != NULL; namep = namep->next) { 559 curnp = namep->namep; 560 561 /* get the unit structure */ 562 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 563 continue; 564 565 /* 566 * If the current soft partition is not on the same 567 * component, continue the search. If it is on the same 568 * component, add it to our namelist. 569 */ 570 err = meta_check_samedrive(compnp, msp->compnamep, ep); 571 if (err <= 0) { 572 /* not on the same device, check the next one */ 573 continue; 574 } 575 576 /* it's on the same drive */ 577 578 /* 579 * Check for overlapping partitions if the component is not 580 * a metadevice. 581 */ 582 if (!metaismeta(msp->compnamep)) { 583 /* 584 * if they're on the same drive, neither 585 * should be a metadevice if one isn't 586 */ 587 assert(!metaismeta(compnp)); 588 589 if (meta_check_overlap(msp->compnamep->cname, 590 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 591 continue; 592 593 /* in this case it's not an error for them to overlap */ 594 mdclrerror(ep); 595 } 596 597 /* Component is on the same device, add to the used list */ 598 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 599 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 600 curnp); 601 602 ++count; 603 ++cached_count; 604 } 605 606 assert(count == cached_count); 607 return (count); 608 609 out: 610 metafreenamelist(*nlpp); 611 *nlpp = NULL; 612 return (-1); 613 } 614 615 /* 616 * FUNCTION: meta_sp_get_default_alignment() 617 * INPUT: sp - the pertinent set name 618 * compnp - the name of the underlying component 619 * OUTPUT: ep - return error pointer 620 * RETURNS: sp_ext_length_t =0: no default alignment 621 * >0: default alignment 622 * PURPOSE: returns the default alignment for soft partitions to 623 * be built on top of the specified component or 624 * metadevice 625 */ 626 static sp_ext_length_t 627 meta_sp_get_default_alignment( 628 mdsetname_t *sp, 629 mdname_t *compnp, 630 md_error_t *ep 631 ) 632 { 633 sp_ext_length_t a = SP_UNALIGNED; 634 char *mname; 635 636 assert(compnp != NULL); 637 638 /* 639 * We treat raw devices as opaque, and assume nothing about 640 * their alignment requirements. 641 */ 642 if (!metaismeta(compnp)) 643 return (SP_UNALIGNED); 644 645 /* 646 * We already know it's a metadevice from the previous test; 647 * metagetmiscname() will tell us which metadevice type we 648 * have 649 */ 650 mname = metagetmiscname(compnp, ep); 651 if (mname == NULL) 652 goto out; 653 654 /* 655 * For a mirror, we want to deal with the stripe that is the 656 * primary side. If it happens to be asymmetrically 657 * configured, there is no simple way to fake a universal 658 * alignment. There's a chance that the least common 659 * denominator of the set of interlaces from all stripes of 660 * all submirrors would do it, but nobody that really cared 661 * that much about this issue would create an asymmetric 662 * config to start with. 663 * 664 * If the component underlying the soft partition is a mirror, 665 * then at the exit of this loop, compnp will have been 666 * updated to describe the first active submirror. 667 */ 668 if (strcmp(mname, MD_MIRROR) == 0) { 669 md_mirror_t *mp; 670 int smi; 671 md_submirror_t *smp; 672 673 mp = meta_get_mirror(sp, compnp, ep); 674 if (mp == NULL) 675 goto out; 676 677 for (smi = 0; smi < NMIRROR; smi++) { 678 679 smp = &mp->submirrors[smi]; 680 if (smp->state == SMS_UNUSED) 681 continue; 682 683 compnp = smp->submirnamep; 684 assert(compnp != NULL); 685 686 mname = metagetmiscname(compnp, ep); 687 if (mname == NULL) 688 goto out; 689 690 break; 691 } 692 693 if (smi == NMIRROR) 694 goto out; 695 } 696 697 /* 698 * Handle stripes and submirrors identically; just return the 699 * interlace of the first row. 700 */ 701 if (strcmp(mname, MD_STRIPE) == 0) { 702 md_stripe_t *stp; 703 704 stp = meta_get_stripe(sp, compnp, ep); 705 if (stp == NULL) 706 goto out; 707 708 a = stp->rows.rows_val[0].interlace; 709 goto out; 710 } 711 712 /* 713 * Raid is even more straightforward; the interlace applies to 714 * the entire device. 715 */ 716 if (strcmp(mname, MD_RAID) == 0) { 717 md_raid_t *rp; 718 719 rp = meta_get_raid(sp, compnp, ep); 720 if (rp == NULL) 721 goto out; 722 723 a = rp->interlace; 724 goto out; 725 } 726 727 /* 728 * If we have arrived here with the alignment still not set, 729 * then we expect the error to have been set by one of the 730 * routines we called. If neither is the case, something has 731 * really gone wrong above. (Probably the submirror walk 732 * failed to produce a valid submirror, but that would be 733 * really bad...) 734 */ 735 out: 736 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 737 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 738 739 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 740 mde_perror(ep, NULL); 741 } 742 743 assert((a > 0) || (!mdisok(ep))); 744 745 return (a); 746 } 747 748 749 750 /* 751 * FUNCTION: meta_check_insp() 752 * INPUT: sp - the set name for the device to check 753 * np - the name of the device to check 754 * slblk - the starting offset of the device to check 755 * nblks - the number of blocks in the device to check 756 * OUTPUT: ep - return error pointer 757 * RETURNS: int - 0 - device contains soft partitions 758 * -1 - device does not contain soft partitions 759 * PURPOSE: determines whether a device contains any soft partitions 760 */ 761 /* ARGSUSED */ 762 int 763 meta_check_insp( 764 mdsetname_t *sp, 765 mdname_t *np, 766 diskaddr_t slblk, 767 diskaddr_t nblks, 768 md_error_t *ep 769 ) 770 { 771 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 772 int count; 773 int rval; 774 775 /* check set pointer */ 776 assert(sp != NULL); 777 778 /* 779 * Get a list of the soft partitions that currently reside on 780 * the component. We should ALWAYS force reload the cache, 781 * because if we're using the md.tab, we must rebuild 782 * the list because it won't contain the previous (if any) 783 * soft partition. 784 */ 785 /* find all soft partitions on the component */ 786 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 787 788 if (count == -1) { 789 rval = -1; 790 } else if (count > 0) { 791 rval = mduseerror(ep, MDE_ALREADY, np->dev, 792 spnlp->namep->cname, np->cname); 793 } else { 794 rval = 0; 795 } 796 797 metafreenamelist(spnlp); 798 return (rval); 799 } 800 801 /* 802 * ************************************************************************** 803 * Extent List Manipulation Functions * 804 * ************************************************************************** 805 */ 806 807 /* 808 * FUNCTION: meta_sp_cmp_by_nameseq() 809 * INPUT: e1 - first node to compare 810 * e2 - second node to compare 811 * OUTPUT: none 812 * RETURNS: int - =0 - nodes are equal 813 * <0 - e1 should go before e2 814 * >0 - e1 should go after e2 815 * PURPOSE: used for sorted list inserts to build a list sorted by 816 * name first and sequence number second. 817 */ 818 static int 819 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 820 { 821 int rval; 822 823 if (e1->ext_namep == NULL) 824 return (1); 825 if (e2->ext_namep == NULL) 826 return (-1); 827 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 828 return (rval); 829 830 /* the names are equal, compare sequence numbers */ 831 if (e1->ext_seq > e2->ext_seq) 832 return (1); 833 if (e1->ext_seq < e2->ext_seq) 834 return (-1); 835 /* sequence numbers are also equal */ 836 return (0); 837 } 838 839 /* 840 * FUNCTION: meta_sp_cmp_by_offset() 841 * INPUT: e1 - first node to compare 842 * e2 - second node to compare 843 * OUTPUT: none 844 * RETURNS: int - =0 - nodes are equal 845 * <0 - e1 should go before e2 846 * >0 - e1 should go after e2 847 * PURPOSE: used for sorted list inserts to build a list sorted by offset 848 */ 849 static int 850 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 851 { 852 if (e1->ext_offset > e2->ext_offset) 853 return (1); 854 if (e1->ext_offset < e2->ext_offset) 855 return (-1); 856 /* offsets are equal */ 857 return (0); 858 } 859 860 /* 861 * FUNCTION: meta_sp_list_insert() 862 * INPUT: sp - the set name for the device the node belongs to 863 * np - the name of the device the node belongs to 864 * head - the head of the list, must be NULL for empty list 865 * offset - the physical offset of this extent in sectors 866 * length - the length of this extent in sectors 867 * type - the type of the extent being inserted 868 * seq - the sequence number of the extent being inserted 869 * flags - extent flags (eg. whether it needs to be updated) 870 * compare - the compare function to use 871 * OUTPUT: head - points to the new head if a node was inserted 872 * at the beginning 873 * RETURNS: void 874 * PURPOSE: inserts an extent node into a sorted doubly linked list. 875 * The sort order is determined by the compare function. 876 * Memory is allocated for the node in this function and it 877 * is up to the caller to free it, possibly using 878 * meta_sp_list_free(). If a node is inserted at the 879 * beginning of the list, the head pointer is updated to 880 * point to the new first node. 881 */ 882 static void 883 meta_sp_list_insert( 884 mdsetname_t *sp, 885 mdname_t *np, 886 sp_ext_node_t **head, 887 sp_ext_offset_t offset, 888 sp_ext_length_t length, 889 sp_ext_type_t type, 890 uint_t seq, 891 uint_t flags, 892 ext_cmpfunc_t compare 893 ) 894 { 895 sp_ext_node_t *newext; 896 sp_ext_node_t *curext; 897 898 assert(head != NULL); 899 900 /* Don't bother adding zero length nodes */ 901 if (length == 0ULL) 902 return; 903 904 /* allocate and fill in new ext_node */ 905 newext = Zalloc(sizeof (sp_ext_node_t)); 906 907 newext->ext_offset = offset; 908 newext->ext_length = length; 909 newext->ext_flags = flags; 910 newext->ext_type = type; 911 newext->ext_seq = seq; 912 newext->ext_setp = sp; 913 newext->ext_namep = np; 914 915 /* first node in the list */ 916 if (*head == NULL) { 917 newext->ext_next = newext->ext_prev = NULL; 918 *head = newext; 919 } else if ((*compare)(*head, newext) >= 0) { 920 /* the first node has a bigger offset, so insert before it */ 921 assert((*head)->ext_prev == NULL); 922 923 newext->ext_prev = NULL; 924 newext->ext_next = *head; 925 (*head)->ext_prev = newext; 926 *head = newext; 927 } else { 928 /* 929 * find the next node whose offset is greater than 930 * the one we want to insert, or the end of the list. 931 */ 932 for (curext = *head; 933 (curext->ext_next != NULL) && 934 ((*compare)(curext->ext_next, newext) < 0); 935 (curext = curext->ext_next)) 936 ; 937 938 /* link the new node in after the current node */ 939 newext->ext_next = curext->ext_next; 940 newext->ext_prev = curext; 941 942 if (curext->ext_next != NULL) 943 curext->ext_next->ext_prev = newext; 944 945 curext->ext_next = newext; 946 } 947 } 948 949 /* 950 * FUNCTION: meta_sp_list_free() 951 * INPUT: head - the head of the list, must be NULL for empty list 952 * OUTPUT: head - points to NULL on return 953 * RETURNS: void 954 * PURPOSE: walks a double linked extent list and frees each node 955 */ 956 static void 957 meta_sp_list_free(sp_ext_node_t **head) 958 { 959 sp_ext_node_t *ext; 960 sp_ext_node_t *next; 961 962 assert(head != NULL); 963 964 ext = *head; 965 while (ext) { 966 next = ext->ext_next; 967 Free(ext); 968 ext = next; 969 } 970 *head = NULL; 971 } 972 973 /* 974 * FUNCTION: meta_sp_list_remove() 975 * INPUT: head - the head of the list, must be NULL for empty list 976 * ext - the extent to remove, must be a member of the list 977 * OUTPUT: head - points to the new head of the list 978 * RETURNS: void 979 * PURPOSE: unlinks the node specified by ext from the list and 980 * frees it, possibly moving the head pointer forward if 981 * the head is the node being removed. 982 */ 983 static void 984 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 985 { 986 assert(head != NULL); 987 assert(*head != NULL); 988 989 if (*head == ext) 990 *head = ext->ext_next; 991 992 if (ext->ext_prev != NULL) 993 ext->ext_prev->ext_next = ext->ext_next; 994 if (ext->ext_next != NULL) 995 ext->ext_next->ext_prev = ext->ext_prev; 996 Free(ext); 997 } 998 999 /* 1000 * FUNCTION: meta_sp_list_size() 1001 * INPUT: head - the head of the list, must be NULL for empty list 1002 * exttype - the type of the extents to sum 1003 * exclude_wm - subtract space for extent headers from total 1004 * OUTPUT: none 1005 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1006 * PURPOSE: sums the lengths of all extents in the list matching the 1007 * specified type. This could be used for computing the 1008 * amount of free or used space, for example. 1009 */ 1010 static sp_ext_length_t 1011 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1012 { 1013 sp_ext_node_t *ext; 1014 sp_ext_length_t size = 0LL; 1015 1016 for (ext = head; ext != NULL; ext = ext->ext_next) 1017 if (ext->ext_type == exttype) 1018 size += ext->ext_length - 1019 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1020 1021 return (size); 1022 } 1023 1024 /* 1025 * FUNCTION: meta_sp_list_find() 1026 * INPUT: head - the head of the list, must be NULL for empty list 1027 * offset - the offset contained by the node to find 1028 * OUTPUT: none 1029 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1030 * or NULL if no such nodes were found. 1031 * PURPOSE: finds a node in a list containing the requested offset 1032 * (inclusive). If multiple nodes contain this offset then 1033 * only the first will be returned, though typically these 1034 * lists are managed with non-overlapping nodes. 1035 * 1036 * *The list MUST be sorted by offset for this function to work.* 1037 */ 1038 static sp_ext_node_t * 1039 meta_sp_list_find( 1040 sp_ext_node_t *head, 1041 sp_ext_offset_t offset 1042 ) 1043 { 1044 sp_ext_node_t *ext; 1045 1046 for (ext = head; ext != NULL; ext = ext->ext_next) { 1047 /* check if the offset lies within this extent */ 1048 if ((offset >= ext->ext_offset) && 1049 (offset < ext->ext_offset + ext->ext_length)) { 1050 /* 1051 * the requested extent should always be a 1052 * subset of an extent in the list. 1053 */ 1054 return (ext); 1055 } 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * FUNCTION: meta_sp_list_freefill() 1062 * INPUT: head - the head of the list, must be NULL for empty list 1063 * size - the size of the volume this extent list is 1064 * representing 1065 * OUTPUT: head - the new head of the list 1066 * RETURNS: void 1067 * PURPOSE: finds gaps in the extent list and fills them with a free 1068 * node. If there is a gap at the beginning the head 1069 * pointer will be changed to point to the new free node. 1070 * If there is free space at the end, the last free extent 1071 * will extend all the way out to the size specified. 1072 * 1073 * *The list MUST be sorted by offset for this function to work.* 1074 */ 1075 static void 1076 meta_sp_list_freefill( 1077 sp_ext_node_t **head, 1078 sp_ext_length_t size 1079 ) 1080 { 1081 sp_ext_node_t *ext; 1082 sp_ext_offset_t curoff = 0LL; 1083 1084 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1085 if (curoff < ext->ext_offset) 1086 meta_sp_list_insert(NULL, NULL, head, 1087 curoff, ext->ext_offset - curoff, 1088 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1089 curoff = ext->ext_offset + ext->ext_length; 1090 } 1091 1092 /* pad inverse list out to the end */ 1093 if (curoff < size) 1094 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1095 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1096 1097 if (getenv(META_SP_DEBUG)) { 1098 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1099 "holes freefilled:\n"); 1100 meta_sp_list_dump(*head); 1101 } 1102 } 1103 1104 /* 1105 * FUNCTION: meta_sp_list_dump() 1106 * INPUT: head - the head of the list, must be NULL for empty list 1107 * OUTPUT: none 1108 * RETURNS: void 1109 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1110 */ 1111 static void 1112 meta_sp_list_dump(sp_ext_node_t *head) 1113 { 1114 sp_ext_node_t *ext; 1115 1116 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1117 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1118 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1119 "Next"); 1120 for (ext = head; ext != NULL; ext = ext->ext_next) { 1121 if (ext->ext_namep != NULL) 1122 meta_sp_debug("%5s", ext->ext_namep->cname); 1123 else 1124 meta_sp_debug("%5s", "NONE"); 1125 1126 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1127 switch (ext->ext_type) { 1128 case EXTTYP_ALLOC: 1129 meta_sp_debug("%7s ", "ALLOC"); 1130 break; 1131 case EXTTYP_FREE: 1132 meta_sp_debug("%7s ", "FREE"); 1133 break; 1134 case EXTTYP_END: 1135 meta_sp_debug("%7s ", "END"); 1136 break; 1137 case EXTTYP_RESERVED: 1138 meta_sp_debug("%7s ", "RESV"); 1139 break; 1140 default: 1141 meta_sp_debug("%7s ", "INVLD"); 1142 break; 1143 } 1144 1145 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1146 ext->ext_offset, ext->ext_length, 1147 ext->ext_flags, (void *) ext->ext_prev, 1148 (void *) ext->ext_next); 1149 } 1150 meta_sp_debug("\n"); 1151 } 1152 1153 /* 1154 * FUNCTION: meta_sp_list_overlaps() 1155 * INPUT: head - the head of the list, must be NULL for empty list 1156 * OUTPUT: none 1157 * RETURNS: int - 1 if extents overlap, 0 if ok 1158 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1159 * offset for this function to work properly. 1160 */ 1161 static int 1162 meta_sp_list_overlaps(sp_ext_node_t *head) 1163 { 1164 sp_ext_node_t *ext; 1165 1166 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1167 if (ext->ext_offset + ext->ext_length > 1168 ext->ext_next->ext_offset) 1169 return (1); 1170 } 1171 return (0); 1172 } 1173 1174 /* 1175 * ************************************************************************** 1176 * Extent Allocation Functions * 1177 * ************************************************************************** 1178 */ 1179 1180 /* 1181 * FUNCTION: meta_sp_alloc_by_ext() 1182 * INPUT: sp - the set name for the device the node belongs to 1183 * np - the name of the device the node belongs to 1184 * head - the head of the list, must be NULL for empty list 1185 * free_ext - the free extent being allocated from 1186 * alloc_offset - the offset of the allocation 1187 * alloc_len - the length of the allocation 1188 * seq - the sequence number of the allocation 1189 * OUTPUT: head - the new head pointer 1190 * RETURNS: void 1191 * PURPOSE: allocates a portion of the free extent free_ext. The 1192 * allocated portion starts at alloc_offset and is 1193 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1194 * alloc_length) must be contained within the free extent. 1195 * 1196 * The free extent is split into as many as 3 pieces - a 1197 * free extent containing [ free_offset .. alloc_offset ), an 1198 * allocated extent containing the range [ alloc_offset .. 1199 * alloc_end ], and another free extent containing the 1200 * range ( alloc_end .. free_end ]. If either of the two 1201 * new free extents would be zero length, they are not created. 1202 * 1203 * Finally, the original free extent is removed. All newly 1204 * created extents have the EXTFLG_UPDATE flag set. 1205 */ 1206 static void 1207 meta_sp_alloc_by_ext( 1208 mdsetname_t *sp, 1209 mdname_t *np, 1210 sp_ext_node_t **head, 1211 sp_ext_node_t *free_ext, 1212 sp_ext_offset_t alloc_offset, 1213 sp_ext_length_t alloc_length, 1214 uint_t seq 1215 ) 1216 { 1217 sp_ext_offset_t free_offset = free_ext->ext_offset; 1218 sp_ext_length_t free_length = free_ext->ext_length; 1219 1220 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1221 sp_ext_offset_t free_end = free_offset + free_length; 1222 1223 /* allocated extent must be a subset of the free extent */ 1224 assert(free_offset <= alloc_offset); 1225 assert(free_end >= alloc_end); 1226 1227 meta_sp_list_remove(head, free_ext); 1228 1229 if (free_offset < alloc_offset) { 1230 meta_sp_list_insert(NULL, NULL, head, free_offset, 1231 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1232 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1233 } 1234 1235 if (free_end > alloc_end) { 1236 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1237 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1238 meta_sp_cmp_by_offset); 1239 } 1240 1241 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1242 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1243 1244 if (getenv(META_SP_DEBUG)) { 1245 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1246 meta_sp_list_dump(*head); 1247 } 1248 } 1249 1250 /* 1251 * FUNCTION: meta_sp_alloc_by_len() 1252 * INPUT: sp - the set name for the device the node belongs to 1253 * np - the name of the device the node belongs to 1254 * head - the head of the list, must be NULL for empty list 1255 * *lp - the requested length to allocate 1256 * last_off - the last offset already allocated. 1257 * alignment - the desired extent alignmeent 1258 * OUTPUT: head - the new head pointer 1259 * *lp - the length allocated 1260 * RETURNS: int - -1 if error, the number of new extents on success 1261 * PURPOSE: allocates extents from free space to satisfy the requested 1262 * length. If requested length is zero, allocates all 1263 * remaining free space. This function provides the meat 1264 * of the extent allocation algorithm. Allocation is a 1265 * three tier process: 1266 * 1267 * 1. If last_off is nonzero and there is free space following 1268 * that node, then it is extended to allocate as much of that 1269 * free space as possible. This is useful for metattach. 1270 * 2. If a free extent can be found to satisfy the remaining 1271 * requested space, then satisfy the rest of the request 1272 * from that extent. 1273 * 3. Start allocating space from any remaining free extents until 1274 * the remainder of the request is satisified. 1275 * 1276 * If alignment is non-zero, then every extent modified 1277 * or newly allocated will be aligned modulo alignment, 1278 * with a length that is an integer multiple of 1279 * alignment. 1280 * 1281 * The EXTFLG_UPDATE flag is set for all nodes (free and 1282 * allocated) that require updated watermarks. 1283 * 1284 * This algorithm may have a negative impact on fragmentation 1285 * in pathological cases and may be improved if it turns out 1286 * to be a problem. This may be exacerbated by particularly 1287 * large alignments. 1288 * 1289 * NOTE: It's confusing, so it demands an explanation: 1290 * - len is used to represent requested data space; it 1291 * does not include room for a watermark. On each full 1292 * or partial allocation, len will be decremented by 1293 * alloc_len (see next paragraph) until it reaches 1294 * zero. 1295 * - alloc_len is used to represent data space allocated 1296 * from a particular extent; it does not include space 1297 * for a watermark. In the rare event that a_length 1298 * (see next paragraph) is equal to MD_SP_WMSIZE, 1299 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1300 * fragment of space will be utterly unusable. 1301 * - a_length is used to represent all space to be 1302 * allocated from a particular extent; it DOES include 1303 * space for a watermark. 1304 */ 1305 static int 1306 meta_sp_alloc_by_len( 1307 mdsetname_t *sp, 1308 mdname_t *np, 1309 sp_ext_node_t **head, 1310 sp_ext_length_t *lp, 1311 sp_ext_offset_t last_off, 1312 sp_ext_offset_t alignment 1313 ) 1314 { 1315 sp_ext_node_t *free_ext; 1316 sp_ext_node_t *alloc_ext; 1317 uint_t last_seq = 0; 1318 uint_t numexts = 0; 1319 sp_ext_length_t freespace; 1320 sp_ext_length_t alloc_len; 1321 sp_ext_length_t len; 1322 1323 /* We're DOA if we can't read *lp */ 1324 assert(lp != NULL); 1325 len = *lp; 1326 1327 /* 1328 * Process the nominal case first: we've been given an actual 1329 * size argument, rather than the literal "all" 1330 */ 1331 1332 if (len != 0) { 1333 1334 /* 1335 * Short circuit the check for free space. This may 1336 * tell us we have enough space when we really don't 1337 * because each extent loses space to a watermark, but 1338 * it will always tell us there isn't enough space 1339 * correctly. Worst case we do some extra work. 1340 */ 1341 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1342 INCLUDE_WM); 1343 1344 if (freespace < len) 1345 return (-1); 1346 1347 /* 1348 * First see if we can extend the last extent for an 1349 * attach. 1350 */ 1351 if (last_off != 0LL) { 1352 int align = 0; 1353 1354 alloc_ext = 1355 meta_sp_list_find(*head, last_off); 1356 assert(alloc_ext != NULL); 1357 1358 /* 1359 * The offset test reflects the 1360 * inclusion of the watermark in the extent 1361 */ 1362 align = (alignment > 0) && 1363 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1364 alignment) == 0); 1365 1366 /* 1367 * If we decided not to align here, we should 1368 * also reset "alignment" so we don't bother 1369 * later, either. 1370 */ 1371 if (!align) { 1372 alignment = 0; 1373 } 1374 1375 last_seq = alloc_ext->ext_seq; 1376 1377 free_ext = meta_sp_list_find(*head, 1378 alloc_ext->ext_offset + 1379 alloc_ext->ext_length); 1380 1381 /* 1382 * If a free extent follows our last allocated 1383 * extent, then remove the last allocated 1384 * extent and increase the size of the free 1385 * extent to overlap it, then allocate the 1386 * total space from the new free extent. 1387 */ 1388 if (free_ext != NULL && 1389 free_ext->ext_type == EXTTYP_FREE) { 1390 assert(free_ext->ext_offset == 1391 alloc_ext->ext_offset + 1392 alloc_ext->ext_length); 1393 1394 alloc_len = 1395 MIN(len, free_ext->ext_length); 1396 1397 if (align && (alloc_len < len)) { 1398 /* No watermark space needed */ 1399 alloc_len -= alloc_len % alignment; 1400 } 1401 1402 if (alloc_len > 0) { 1403 free_ext->ext_offset -= 1404 alloc_ext->ext_length; 1405 free_ext->ext_length += 1406 alloc_ext->ext_length; 1407 1408 meta_sp_alloc_by_ext(sp, np, head, 1409 free_ext, free_ext->ext_offset, 1410 alloc_ext->ext_length + alloc_len, 1411 last_seq); 1412 1413 /* 1414 * now remove the original allocated 1415 * node. We may have overlapping 1416 * extents for a short time before 1417 * this node is removed. 1418 */ 1419 meta_sp_list_remove(head, alloc_ext); 1420 len -= alloc_len; 1421 } 1422 } 1423 last_seq++; 1424 } 1425 1426 if (len == 0LL) 1427 goto out; 1428 1429 /* 1430 * Next, see if we can find a single allocation for 1431 * the remainder. This may make fragmentation worse 1432 * in some cases, but there's no good way to allocate 1433 * that doesn't have a highly fragmented corner case. 1434 */ 1435 for (free_ext = *head; free_ext != NULL; 1436 free_ext = free_ext->ext_next) { 1437 sp_ext_offset_t a_offset; 1438 sp_ext_offset_t a_length; 1439 1440 if (free_ext->ext_type != EXTTYP_FREE) 1441 continue; 1442 1443 /* 1444 * The length test should include space for 1445 * the watermark 1446 */ 1447 1448 a_offset = free_ext->ext_offset; 1449 a_length = free_ext->ext_length; 1450 1451 if (alignment > 0) { 1452 1453 /* 1454 * Shortcut for extents that have been 1455 * previously added to pad out the 1456 * data space 1457 */ 1458 if (a_length < alignment) { 1459 continue; 1460 } 1461 1462 /* 1463 * Round up so the data space begins 1464 * on a properly aligned boundary. 1465 */ 1466 a_offset += alignment - 1467 (a_offset % alignment) - MD_SP_WMSIZE; 1468 1469 /* 1470 * This is only necessary in case the 1471 * watermark size is ever greater than 1472 * one. It'll never happen, of 1473 * course; we'll get rid of watermarks 1474 * before we make 'em bigger. 1475 */ 1476 if (a_offset < free_ext->ext_offset) { 1477 a_offset += alignment; 1478 } 1479 1480 /* 1481 * Adjust the length to account for 1482 * the space lost above (if any) 1483 */ 1484 a_length -= 1485 (a_offset - free_ext->ext_offset); 1486 } 1487 1488 if (a_length >= len + MD_SP_WMSIZE) { 1489 meta_sp_alloc_by_ext(sp, np, head, 1490 free_ext, a_offset, 1491 len + MD_SP_WMSIZE, last_seq); 1492 1493 len = 0LL; 1494 numexts++; 1495 break; 1496 } 1497 } 1498 1499 if (len == 0LL) 1500 goto out; 1501 1502 1503 /* 1504 * If the request could not be satisfied by extending 1505 * the last extent or by a single extent, then put 1506 * multiple smaller extents together until the request 1507 * is satisfied. 1508 */ 1509 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1510 free_ext = free_ext->ext_next) { 1511 sp_ext_offset_t a_offset; 1512 sp_ext_length_t a_length; 1513 1514 if (free_ext->ext_type != EXTTYP_FREE) 1515 continue; 1516 1517 a_offset = free_ext->ext_offset; 1518 a_length = free_ext->ext_length; 1519 1520 if (alignment > 0) { 1521 1522 /* 1523 * Shortcut for extents that have been 1524 * previously added to pad out the 1525 * data space 1526 */ 1527 if (a_length < alignment) { 1528 continue; 1529 } 1530 1531 /* 1532 * Round up so the data space begins 1533 * on a properly aligned boundary. 1534 */ 1535 a_offset += alignment - 1536 (a_offset % alignment) - MD_SP_WMSIZE; 1537 1538 /* 1539 * This is only necessary in case the 1540 * watermark size is ever greater than 1541 * one. It'll never happen, of 1542 * course; we'll get rid of watermarks 1543 * before we make 'em bigger. 1544 */ 1545 if (a_offset < free_ext->ext_offset) { 1546 a_offset += alignment; 1547 } 1548 1549 /* 1550 * Adjust the length to account for 1551 * the space lost above (if any) 1552 */ 1553 a_length -= 1554 (a_offset - free_ext->ext_offset); 1555 1556 /* 1557 * Adjust the length to be properly 1558 * aligned if it is NOT to be the 1559 * last extent in the soft partition. 1560 */ 1561 if ((a_length - MD_SP_WMSIZE) < len) 1562 a_length -= 1563 (a_length - MD_SP_WMSIZE) 1564 % alignment; 1565 } 1566 1567 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1568 if (alloc_len == 0) 1569 continue; 1570 1571 /* 1572 * meta_sp_alloc_by_ext() expects the 1573 * allocation length to include the watermark 1574 * size, which is why we don't simply pass in 1575 * alloc_len here. 1576 */ 1577 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1578 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1579 last_seq); 1580 1581 len -= alloc_len; 1582 numexts++; 1583 last_seq++; 1584 } 1585 1586 1587 /* 1588 * If there was not enough space we can throw it all 1589 * away since no real work has been done yet. 1590 */ 1591 if (len != 0) { 1592 meta_sp_list_free(head); 1593 return (-1); 1594 } 1595 } 1596 1597 /* 1598 * Otherwise, the literal "all" was specified: allocate all 1599 * available free space. Don't bother with alignment. 1600 */ 1601 else { 1602 /* First, extend the last extent if this is a grow */ 1603 if (last_off != 0LL) { 1604 alloc_ext = 1605 meta_sp_list_find(*head, last_off); 1606 assert(alloc_ext != NULL); 1607 1608 last_seq = alloc_ext->ext_seq; 1609 1610 free_ext = meta_sp_list_find(*head, 1611 alloc_ext->ext_offset + 1612 alloc_ext->ext_length); 1613 1614 /* 1615 * If a free extent follows our last allocated 1616 * extent, then remove the last allocated 1617 * extent and increase the size of the free 1618 * extent to overlap it, then allocate the 1619 * total space from the new free extent. 1620 */ 1621 if (free_ext != NULL && 1622 free_ext->ext_type == EXTTYP_FREE) { 1623 assert(free_ext->ext_offset == 1624 alloc_ext->ext_offset + 1625 alloc_ext->ext_length); 1626 1627 len = alloc_len = 1628 free_ext->ext_length; 1629 1630 free_ext->ext_offset -= 1631 alloc_ext->ext_length; 1632 free_ext->ext_length += 1633 alloc_ext->ext_length; 1634 1635 meta_sp_alloc_by_ext(sp, np, head, 1636 free_ext, free_ext->ext_offset, 1637 alloc_ext->ext_length + alloc_len, 1638 last_seq); 1639 1640 /* 1641 * now remove the original allocated 1642 * node. We may have overlapping 1643 * extents for a short time before 1644 * this node is removed. 1645 */ 1646 meta_sp_list_remove(head, alloc_ext); 1647 } 1648 1649 last_seq++; 1650 } 1651 1652 /* Next, grab all remaining free space */ 1653 for (free_ext = *head; free_ext != NULL; 1654 free_ext = free_ext->ext_next) { 1655 1656 if (free_ext->ext_type == EXTTYP_FREE) { 1657 alloc_len = 1658 free_ext->ext_length - MD_SP_WMSIZE; 1659 if (alloc_len == 0) 1660 continue; 1661 1662 /* 1663 * meta_sp_alloc_by_ext() expects the 1664 * allocation length to include the 1665 * watermark size, which is why we 1666 * don't simply pass in alloc_len 1667 * here. 1668 */ 1669 meta_sp_alloc_by_ext(sp, np, head, 1670 free_ext, free_ext->ext_offset, 1671 free_ext->ext_length, 1672 last_seq); 1673 1674 len += alloc_len; 1675 numexts++; 1676 last_seq++; 1677 } 1678 } 1679 } 1680 1681 out: 1682 if (getenv(META_SP_DEBUG)) { 1683 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1684 "allocation:\n"); 1685 meta_sp_list_dump(*head); 1686 } 1687 1688 if (*lp == 0) { 1689 *lp = len; 1690 1691 /* 1692 * Make sure the callers hit a no space error if we 1693 * didn't actually find anything. 1694 */ 1695 if (len == 0) { 1696 return (-1); 1697 } 1698 } 1699 1700 return (numexts); 1701 } 1702 1703 /* 1704 * FUNCTION: meta_sp_alloc_by_list() 1705 * INPUT: sp - the set name for the device the node belongs to 1706 * np - the name of the device the node belongs to 1707 * head - the head of the list, must be NULL for empty list 1708 * oblist - an extent list containing requested nodes to allocate 1709 * OUTPUT: head - the new head pointer 1710 * RETURNS: int - -1 if error, the number of new extents on success 1711 * PURPOSE: allocates extents from free space to satisfy the requested 1712 * extent list. This is primarily used for the -o/-b options 1713 * where the user may specifically request extents to allocate. 1714 * Each extent in the oblist must be a subset (inclusive) of a 1715 * free extent and may not overlap each other. This 1716 * function sets the EXTFLG_UPDATE flag for each node that 1717 * requires a watermark update after allocating. 1718 */ 1719 static int 1720 meta_sp_alloc_by_list( 1721 mdsetname_t *sp, 1722 mdname_t *np, 1723 sp_ext_node_t **head, 1724 sp_ext_node_t *oblist 1725 ) 1726 { 1727 sp_ext_node_t *ext; 1728 sp_ext_node_t *free_ext; 1729 uint_t numexts = 0; 1730 1731 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1732 1733 free_ext = meta_sp_list_find(*head, 1734 ext->ext_offset - MD_SP_WMSIZE); 1735 1736 /* Make sure the allocation is within the free extent */ 1737 if ((free_ext == NULL) || 1738 (ext->ext_offset + ext->ext_length > 1739 free_ext->ext_offset + free_ext->ext_length) || 1740 (free_ext->ext_type != EXTTYP_FREE)) 1741 return (-1); 1742 1743 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1744 ext->ext_offset - MD_SP_WMSIZE, 1745 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1746 1747 numexts++; 1748 } 1749 1750 assert(meta_sp_list_overlaps(*head) == 0); 1751 1752 if (getenv(META_SP_DEBUG)) { 1753 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1754 "allocation:\n"); 1755 meta_sp_list_dump(*head); 1756 } 1757 1758 return (numexts); 1759 } 1760 1761 /* 1762 * ************************************************************************** 1763 * Extent List Population Functions * 1764 * ************************************************************************** 1765 */ 1766 1767 /* 1768 * FUNCTION: meta_sp_extlist_from_namelist() 1769 * INPUT: sp - the set name for the device the node belongs to 1770 * spnplp - the namelist of soft partitions to build a list from 1771 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1772 * ep - return error pointer 1773 * RETURNS: int - -1 if error, 0 on success 1774 * PURPOSE: builds an extent list representing the soft partitions 1775 * specified in the namelist. Each extent in each soft 1776 * partition is added to the list with the type EXTTYP_ALLOC. 1777 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1778 * extent in the list includes the space occupied by the 1779 * watermark, which is not included in the unit structures. 1780 */ 1781 static int 1782 meta_sp_extlist_from_namelist( 1783 mdsetname_t *sp, 1784 mdnamelist_t *spnlp, 1785 sp_ext_node_t **extlist, 1786 md_error_t *ep 1787 ) 1788 { 1789 int extn; 1790 md_sp_t *msp; /* unit structure of the sp's */ 1791 mdnamelist_t *namep; 1792 1793 assert(sp != NULL); 1794 1795 /* 1796 * Now go through the soft partitions and add a node to the used 1797 * list for each allocated extent. 1798 */ 1799 for (namep = spnlp; namep != NULL; namep = namep->next) { 1800 mdname_t *curnp = namep->namep; 1801 1802 /* get the unit structure */ 1803 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1804 return (-1); 1805 1806 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1807 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1808 1809 /* 1810 * subtract from offset and add to the length 1811 * to account for the watermark, which is not 1812 * contained in the extents in the unit structure. 1813 */ 1814 meta_sp_list_insert(sp, curnp, extlist, 1815 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1816 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1817 } 1818 } 1819 return (0); 1820 } 1821 1822 /* 1823 * FUNCTION: meta_sp_extlist_from_wm() 1824 * INPUT: sp - the set name for the device the node belongs to 1825 * compnp - the name of the device to scan watermarks on 1826 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1827 * ep - return error pointer 1828 * RETURNS: int - -1 if error, 0 on success 1829 * PURPOSE: builds an extent list representing the soft partitions 1830 * specified in the namelist. Each extent in each soft 1831 * partition is added to the list with the type EXTTYP_ALLOC. 1832 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1833 * extent in the list includes the space occupied by the 1834 * watermark, which is not included in the unit structures. 1835 */ 1836 static int 1837 meta_sp_extlist_from_wm( 1838 mdsetname_t *sp, 1839 mdname_t *compnp, 1840 sp_ext_node_t **extlist, 1841 ext_cmpfunc_t compare, 1842 md_error_t *ep 1843 ) 1844 { 1845 mp_watermark_t wm; 1846 mdname_t *np = NULL; 1847 mdsetname_t *spsetp = NULL; 1848 sp_ext_offset_t cur_off; 1849 md_set_desc *sd; 1850 int init = 0; 1851 mdkey_t key; 1852 minor_t mnum; 1853 1854 if (!metaislocalset(sp)) { 1855 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1856 return (-1); 1857 } 1858 1859 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1860 return (-1); 1861 1862 for (;;) { 1863 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1864 return (-1); 1865 } 1866 1867 /* get the set and name pointers */ 1868 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1869 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1870 return (-1); 1871 } 1872 } 1873 1874 /* 1875 * For the MN set, meta_init_make_device needs to 1876 * be run on all the nodes so the entries for the 1877 * softpart device name and its comp can be created 1878 * in the same order in the replica namespace. If 1879 * we have it run on mdmn_do_iocset then the mddbs 1880 * will be out of sync between master node and slave 1881 * nodes. 1882 */ 1883 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1884 1885 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1886 md_mn_msg_addmdname_t *send_params; 1887 int result; 1888 md_mn_result_t *resp = NULL; 1889 int message_size; 1890 1891 message_size = sizeof (*send_params) + 1892 strlen(wm.wm_mdname) + 1; 1893 send_params = Zalloc(message_size); 1894 send_params->addmdname_setno = sp->setno; 1895 (void) strcpy(&send_params->addmdname_name[0], 1896 wm.wm_mdname); 1897 result = mdmn_send_message(sp->setno, 1898 MD_MN_MSG_ADDMDNAME, 1899 MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, 1900 (char *)send_params, message_size, &resp, 1901 ep); 1902 Free(send_params); 1903 if (resp != NULL) { 1904 if (resp->mmr_exitval != 0) { 1905 free_result(resp); 1906 return (-1); 1907 } 1908 free_result(resp); 1909 } 1910 if (result != 0) 1911 return (-1); 1912 } else { 1913 1914 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1915 if ((key = meta_init_make_device(&sp, 1916 wm.wm_mdname, ep)) <= 0) { 1917 return (-1); 1918 } 1919 init = 1; 1920 } 1921 } 1922 1923 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1924 if (np == NULL) { 1925 if (init) { 1926 if (meta_getnmentbykey(sp->setno, 1927 MD_SIDEWILD, key, NULL, &mnum, 1928 NULL, ep) != NULL) { 1929 (void) metaioctl(MD_IOCREM_DEV, 1930 &mnum, ep, NULL); 1931 } 1932 (void) del_self_name(sp, key, ep); 1933 } 1934 return (-1); 1935 } 1936 } 1937 1938 /* insert watermark into extent list */ 1939 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1940 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1941 EXTFLG_UPDATE, compare); 1942 1943 /* if we see the end watermark, we're done */ 1944 if (wm.wm_type == EXTTYP_END) 1945 break; 1946 1947 cur_off += wm.wm_length + 1; 1948 1949 /* clear out set and name pointers for next iteration */ 1950 np = NULL; 1951 spsetp = NULL; 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * ************************************************************************** 1959 * Print (metastat) Functions * 1960 * ************************************************************************** 1961 */ 1962 1963 /* 1964 * FUNCTION: meta_sp_short_print() 1965 * INPUT: msp - the unit structure to display 1966 * fp - the file pointer to send output to 1967 * options - print options from the command line processor 1968 * OUTPUT: ep - return error pointer 1969 * RETURNS: int - -1 if error, 0 on success 1970 * PURPOSE: display a short report of the soft partition in md.tab 1971 * form, primarily used for metastat -p. 1972 */ 1973 static int 1974 meta_sp_short_print( 1975 md_sp_t *msp, 1976 char *fname, 1977 FILE *fp, 1978 mdprtopts_t options, 1979 md_error_t *ep 1980 ) 1981 { 1982 int extn; 1983 1984 if (options & PRINT_LARGEDEVICES) { 1985 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1986 return (0); 1987 } 1988 1989 if (options & PRINT_FN) { 1990 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1991 return (0); 1992 } 1993 1994 /* print name and -p */ 1995 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1996 return (mdsyserror(ep, errno, fname)); 1997 1998 /* print the component */ 1999 /* 2000 * Always print the full path name 2001 */ 2002 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2003 return (mdsyserror(ep, errno, fname)); 2004 2005 /* print out each extent */ 2006 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2007 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2008 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2009 extp->len) == EOF) 2010 return (mdsyserror(ep, errno, fname)); 2011 } 2012 2013 if (fprintf(fp, "\n") == EOF) 2014 return (mdsyserror(ep, errno, fname)); 2015 2016 /* success */ 2017 return (0); 2018 } 2019 2020 /* 2021 * FUNCTION: meta_sp_status_to_name() 2022 * INPUT: xsp_status - the status value to convert to a string 2023 * tstate - transient errored device state. If set the 2024 * device is Unavailable 2025 * OUTPUT: none 2026 * RETURNS: char * - a pointer to the string representing the status value 2027 * PURPOSE: return an internationalized string representing the 2028 * status value for a soft partition. The strings are 2029 * strdup'd and must be freed by the caller. 2030 */ 2031 static char * 2032 meta_sp_status_to_name( 2033 xsp_status_t xsp_status, 2034 uint_t tstate 2035 ) 2036 { 2037 char *rval = NULL; 2038 2039 /* 2040 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2041 * value for an 'Unavailable' return. tstate can be set because of 2042 * other multi-node reasons (e.g. ABR being set) 2043 */ 2044 if (tstate & MD_INACCESSIBLE) { 2045 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2046 } 2047 2048 switch (xsp_status) { 2049 case MD_SP_CREATEPEND: 2050 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2051 break; 2052 case MD_SP_GROWPEND: 2053 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2054 break; 2055 case MD_SP_DELPEND: 2056 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2057 break; 2058 case MD_SP_OK: 2059 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2060 break; 2061 case MD_SP_ERR: 2062 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2063 break; 2064 case MD_SP_RECOVER: 2065 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2066 break; 2067 } 2068 2069 if (rval == NULL) 2070 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2071 2072 return (rval); 2073 } 2074 2075 /* 2076 * FUNCTION: meta_sp_report() 2077 * INPUT: sp - the set name for the unit being displayed 2078 * msp - the unit structure to display 2079 * nlpp - pass back the large devs 2080 * fp - the file pointer to send output to 2081 * options - print options from the command line processor 2082 * OUTPUT: ep - return error pointer 2083 * RETURNS: int - -1 if error, 0 on success 2084 * PURPOSE: print a full report of the device specified 2085 */ 2086 static int 2087 meta_sp_report( 2088 mdsetname_t *sp, 2089 md_sp_t *msp, 2090 mdnamelist_t **nlpp, 2091 char *fname, 2092 FILE *fp, 2093 mdprtopts_t options, 2094 md_error_t *ep 2095 ) 2096 { 2097 uint_t extn; 2098 char *status; 2099 char *devid = ""; 2100 mdname_t *didnp = NULL; 2101 ddi_devid_t dtp; 2102 int len; 2103 uint_t tstate = 0; 2104 2105 if (options & PRINT_LARGEDEVICES) { 2106 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2107 return (0); 2108 } else { 2109 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2110 return (-1); 2111 } 2112 } 2113 2114 if (options & PRINT_FN) { 2115 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2116 return (0); 2117 } else { 2118 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2119 return (-1); 2120 } 2121 } 2122 2123 if (options & PRINT_HEADER) { 2124 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2125 msp->common.namep->cname) == EOF) 2126 return (mdsyserror(ep, errno, fname)); 2127 } 2128 2129 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2130 msp->compnamep->cname) == EOF) 2131 return (mdsyserror(ep, errno, fname)); 2132 2133 /* Determine if device is available before displaying status */ 2134 if (metaismeta(msp->common.namep)) { 2135 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2136 return (-1); 2137 } 2138 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2139 2140 /* print out "State" to be consistent with other metadevices */ 2141 if (tstate & MD_ABR_CAP) { 2142 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2143 " State: %s - Application Based Recovery (ABR)\n"), 2144 status) == EOF) { 2145 Free(status); 2146 return (mdsyserror(ep, errno, fname)); 2147 } 2148 } else { 2149 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2150 " State: %s\n"), status) == EOF) { 2151 Free(status); 2152 return (mdsyserror(ep, errno, fname)); 2153 } 2154 } 2155 free(status); 2156 2157 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2158 msp->common.size, 2159 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2160 return (mdsyserror(ep, errno, fname)); 2161 2162 /* print component details */ 2163 if (! metaismeta(msp->compnamep)) { 2164 diskaddr_t start_blk; 2165 int has_mddb; 2166 char *has_mddb_str; 2167 2168 /* print header */ 2169 /* 2170 * Building a format string on the fly that will 2171 * be used in (f)printf. This allows the length 2172 * of the ctd to vary from small to large without 2173 * looking horrible. 2174 */ 2175 len = strlen(msp->compnamep->cname); 2176 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2177 len += 2; 2178 if (fprintf(fp, 2179 "\t%-*.*s %-12.12s %-5.5s %s\n", 2180 len, len, 2181 dgettext(TEXT_DOMAIN, "Device"), 2182 dgettext(TEXT_DOMAIN, "Start Block"), 2183 dgettext(TEXT_DOMAIN, "Dbase"), 2184 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2185 return (mdsyserror(ep, errno, fname)); 2186 } 2187 2188 2189 /* get info */ 2190 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2191 MD_DISKADDR_ERROR) 2192 return (-1); 2193 2194 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2195 return (-1); 2196 2197 if (has_mddb) 2198 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2199 else 2200 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2201 2202 /* populate the key in the name_p structure */ 2203 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2204 if (didnp == NULL) { 2205 return (-1); 2206 } 2207 2208 /* determine if devid does NOT exist */ 2209 if (options & PRINT_DEVID) { 2210 if ((dtp = meta_getdidbykey(sp->setno, 2211 getmyside(sp, ep), didnp->key, ep)) == NULL) 2212 devid = dgettext(TEXT_DOMAIN, "No "); 2213 else { 2214 devid = dgettext(TEXT_DOMAIN, "Yes"); 2215 free(dtp); 2216 } 2217 } 2218 2219 /* print info */ 2220 /* 2221 * This allows the length 2222 * of the ctd to vary from small to large without 2223 * looking horrible. 2224 */ 2225 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2226 len, msp->compnamep->cname, 2227 start_blk, has_mddb_str, devid) == EOF) { 2228 return (mdsyserror(ep, errno, fname)); 2229 } 2230 (void) fprintf(fp, "\n"); 2231 } 2232 2233 2234 /* print the headers */ 2235 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2236 dgettext(TEXT_DOMAIN, "Extent"), 2237 dgettext(TEXT_DOMAIN, "Start Block"), 2238 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2239 return (mdsyserror(ep, errno, fname)); 2240 2241 /* print out each extent */ 2242 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2243 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2244 2245 /* If PRINT_TIMES option is ever supported, add output here */ 2246 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2247 extn, extp->poff, extp->len) == EOF) 2248 return (mdsyserror(ep, errno, fname)); 2249 } 2250 2251 /* separate records with a newline */ 2252 (void) fprintf(fp, "\n"); 2253 return (0); 2254 } 2255 2256 /* 2257 * FUNCTION: meta_sp_print() 2258 * INPUT: sp - the set name for the unit being displayed 2259 * np - the name of the device to print 2260 * fname - ??? not used 2261 * fp - the file pointer to send output to 2262 * options - print options from the command line processor 2263 * OUTPUT: ep - return error pointer 2264 * RETURNS: int - -1 if error, 0 on success 2265 * PURPOSE: print a full report of the device specified by metastat. 2266 * This is the main entry point for printing. 2267 */ 2268 int 2269 meta_sp_print( 2270 mdsetname_t *sp, 2271 mdname_t *np, 2272 mdnamelist_t **nlpp, 2273 char *fname, 2274 FILE *fp, 2275 mdprtopts_t options, 2276 md_error_t *ep 2277 ) 2278 { 2279 md_sp_t *msp; 2280 md_unit_t *mdp; 2281 int rval = 0; 2282 set_t setno; 2283 minor_t unit; 2284 2285 /* should always have the same set */ 2286 assert(sp != NULL); 2287 2288 /* print all the soft partitions */ 2289 if (np == NULL) { 2290 mdnamelist_t *nlp = NULL; 2291 mdnamelist_t *p; 2292 int cnt; 2293 2294 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2295 return (-1); 2296 else if (cnt == 0) 2297 return (0); 2298 2299 /* recusively print them out */ 2300 for (p = nlp; (p != NULL); p = p->next) { 2301 mdname_t *curnp = p->namep; 2302 2303 /* 2304 * one problem with the rval of -1 here is that 2305 * the error gets "lost" when the next device is 2306 * printed, but we want to print them all anyway. 2307 */ 2308 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2309 options, ep); 2310 } 2311 2312 /* clean up, return success */ 2313 metafreenamelist(nlp); 2314 return (rval); 2315 } 2316 2317 /* get the unit structure */ 2318 if ((msp = meta_get_sp_common(sp, np, 2319 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2320 return (-1); 2321 2322 /* check for parented */ 2323 if ((! (options & PRINT_SUBDEVS)) && 2324 (MD_HAS_PARENT(msp->common.parent))) { 2325 return (0); 2326 } 2327 2328 /* print appropriate detail */ 2329 if (options & PRINT_SHORT) { 2330 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2331 return (-1); 2332 } else { 2333 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2334 return (-1); 2335 } 2336 2337 /* 2338 * Print underlying metadevices if they are parented to us and 2339 * if the info for the underlying metadevice has not been printed. 2340 */ 2341 if (metaismeta(msp->compnamep)) { 2342 /* get the unit structure for the subdevice */ 2343 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2344 return (-1); 2345 2346 setno = MD_MIN2SET(MD_SID(mdp)); 2347 unit = MD_MIN2UNIT(MD_SID(mdp)); 2348 2349 /* If info not already printed, recurse */ 2350 if (sp_parent_printed[setno] == NULL || 2351 !BT_TEST(sp_parent_printed[setno], unit)) { 2352 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2353 (options | PRINT_HEADER | PRINT_SUBDEVS), 2354 NULL, ep) != 0) { 2355 return (-1); 2356 } 2357 if (sp_parent_printed[setno] == NULL) 2358 sp_parent_printed[setno] = 2359 Zalloc(BT_BITOUL(MD_MAXUNITS)); 2360 BT_SET(sp_parent_printed[setno], unit); 2361 } 2362 } 2363 return (0); 2364 } 2365 2366 /* 2367 * ************************************************************************** 2368 * Watermark Manipulation Functions * 2369 * ************************************************************************** 2370 */ 2371 2372 /* 2373 * FUNCTION: meta_sp_get_start() 2374 * INPUT: sp - the operating set 2375 * np - device upon which the sp is being built 2376 * OUTPUT: ep - return error pointer 2377 * RETURNS: daddr_t - -1 if error, otherwise the start block 2378 * PURPOSE: Encapsulate the determination of the start block of the 2379 * device upon which the sp is built or being built. 2380 */ 2381 static diskaddr_t 2382 meta_sp_get_start( 2383 mdsetname_t *sp, 2384 mdname_t *np, 2385 md_error_t *ep 2386 ) 2387 { 2388 daddr_t start_block; 2389 2390 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) 2391 start_block += MD_SP_START; 2392 2393 return (start_block); 2394 } 2395 2396 /* 2397 * FUNCTION: meta_sp_update_wm_common() 2398 * INPUT: sp - the operating set 2399 * msp - a pointer to the XDR unit structure 2400 * extlist - the extent list specifying watermarks to update 2401 * iocval - either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM 2402 * OUTPUT: ep - return error pointer 2403 * RETURNS: int - -1 if error, 0 on success 2404 * PURPOSE: steps backwards through the extent list updating 2405 * watermarks for all extents with the EXTFLG_UPDATE flag 2406 * set. Writing the watermarks guarantees consistency when 2407 * extents must be broken into pieces since the original 2408 * watermark will be the last to be updated, and will be 2409 * changed to point to a new watermark that is already 2410 * known to be consistent. If one of the writes fails, the 2411 * original watermark stays intact and none of the changes 2412 * are realized. 2413 */ 2414 static int 2415 meta_sp_update_wm_common( 2416 mdsetname_t *sp, 2417 md_sp_t *msp, 2418 sp_ext_node_t *extlist, 2419 int iocval, 2420 md_error_t *ep 2421 ) 2422 { 2423 sp_ext_node_t *ext; 2424 sp_ext_node_t *tail; 2425 mp_watermark_t *wmp, *watermarks; 2426 xsp_offset_t *osp, *offsets; 2427 int update_count = 0; 2428 int rval = 0; 2429 md_unit_t *mdp; 2430 md_sp_update_wm_t update_params; 2431 2432 if (getenv(META_SP_DEBUG)) { 2433 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2434 meta_sp_list_dump(extlist); 2435 } 2436 2437 /* 2438 * find the last node so we can write the watermarks backwards 2439 * and count watermarks to update so we can allocate space 2440 */ 2441 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2442 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2443 update_count++; 2444 } 2445 2446 if (ext->ext_next == NULL) { 2447 tail = ext; 2448 } 2449 } 2450 ext = tail; 2451 2452 wmp = watermarks = 2453 Zalloc(update_count * sizeof (mp_watermark_t)); 2454 osp = offsets = 2455 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2456 2457 while (ext != NULL) { 2458 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2459 /* update watermark */ 2460 wmp->wm_magic = MD_SP_MAGIC; 2461 wmp->wm_version = MD_SP_VERSION; 2462 wmp->wm_type = ext->ext_type; 2463 wmp->wm_seq = ext->ext_seq; 2464 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2465 2466 /* fill in the volume name and set name */ 2467 if (ext->ext_namep != NULL) 2468 (void) strcpy(wmp->wm_mdname, 2469 ext->ext_namep->cname); 2470 else 2471 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2472 if (ext->ext_setp != NULL && 2473 ext->ext_setp->setno != MD_LOCAL_SET) 2474 (void) strcpy(wmp->wm_setname, 2475 ext->ext_setp->setname); 2476 else 2477 (void) strcpy(wmp->wm_setname, 2478 MD_SP_LOCALSETNAME); 2479 2480 /* Generate the checksum */ 2481 wmp->wm_checksum = 0; 2482 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2483 sizeof (*wmp), NULL); 2484 2485 /* record the extent offset */ 2486 *osp = ext->ext_offset; 2487 2488 /* Advance the placeholders */ 2489 osp++; wmp++; 2490 } 2491 ext = ext->ext_prev; 2492 } 2493 2494 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2495 if (mdp == NULL) { 2496 rval = -1; 2497 goto out; 2498 } 2499 2500 (void) memset(&update_params, 0, sizeof (update_params)); 2501 update_params.mnum = MD_SID(mdp); 2502 update_params.count = update_count; 2503 update_params.wmp = (uintptr_t)watermarks; 2504 update_params.osp = (uintptr_t)offsets; 2505 MD_SETDRIVERNAME(&update_params, MD_SP, 2506 MD_MIN2SET(update_params.mnum)); 2507 2508 if (metaioctl(iocval, &update_params, &update_params.mde, 2509 msp->common.namep->cname) != 0) { 2510 (void) mdstealerror(ep, &update_params.mde); 2511 rval = -1; 2512 goto out; 2513 } 2514 2515 out: 2516 Free(watermarks); 2517 Free(offsets); 2518 2519 return (rval); 2520 } 2521 2522 static int 2523 meta_sp_update_wm( 2524 mdsetname_t *sp, 2525 md_sp_t *msp, 2526 sp_ext_node_t *extlist, 2527 md_error_t *ep 2528 ) 2529 { 2530 return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM, 2531 ep)); 2532 } 2533 2534 static int 2535 meta_mn_sp_update_wm( 2536 mdsetname_t *sp, 2537 md_sp_t *msp, 2538 sp_ext_node_t *extlist, 2539 md_error_t *ep 2540 ) 2541 { 2542 return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM, 2543 ep)); 2544 } 2545 2546 /* 2547 * FUNCTION: meta_sp_clear_wm() 2548 * INPUT: sp - the operating set 2549 * msp - the unit structure for the soft partition to clear 2550 * OUTPUT: ep - return error pointer 2551 * RETURNS: int - -1 if error, 0 on success 2552 * PURPOSE: steps through the extents for a soft partition unit and 2553 * creates an extent list designed to mark all of the 2554 * watermarks for those extents as free. The extent list 2555 * is then passed to meta_sp_update_wm() to actually write 2556 * the watermarks out. 2557 */ 2558 static int 2559 meta_sp_clear_wm( 2560 mdsetname_t *sp, 2561 md_sp_t *msp, 2562 md_error_t *ep 2563 ) 2564 { 2565 sp_ext_node_t *extlist = NULL; 2566 int numexts = msp->ext.ext_len; 2567 uint_t i; 2568 int rval = 0; 2569 2570 /* for each watermark must set the flag to SP_FREE */ 2571 for (i = 0; i < numexts; i++) { 2572 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2573 2574 meta_sp_list_insert(NULL, NULL, &extlist, 2575 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2576 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2577 } 2578 2579 /* update watermarks */ 2580 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2581 2582 meta_sp_list_free(&extlist); 2583 return (rval); 2584 } 2585 2586 /* 2587 * FUNCTION: meta_sp_read_wm() 2588 * INPUT: sp - setname for component 2589 * compnp - mdname_t for component 2590 * offset - the offset of the watermark to read (sectors) 2591 * OUTPUT: wm - the watermark structure to read into 2592 * ep - return error pointer 2593 * RETURNS: int - -1 if error, 0 on success 2594 * PURPOSE: seeks out to the requested offset and reads a watermark. 2595 * It then verifies that the magic number is correct and 2596 * that the checksum is valid, returning an error if either 2597 * is wrong. 2598 */ 2599 static int 2600 meta_sp_read_wm( 2601 mdsetname_t *sp, 2602 mdname_t *compnp, 2603 mp_watermark_t *wm, 2604 sp_ext_offset_t offset, 2605 md_error_t *ep 2606 ) 2607 { 2608 md_sp_read_wm_t read_params; 2609 2610 /* 2611 * make sure block offset does not overflow 2^64 bytes and it's a 2612 * multiple of the block size. 2613 */ 2614 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2615 /* LINTED */ 2616 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2617 2618 (void) memset(wm, 0, sizeof (*wm)); 2619 2620 (void) memset(&read_params, 0, sizeof (read_params)); 2621 read_params.rdev = compnp->dev; 2622 read_params.wmp = (uintptr_t)wm; 2623 read_params.offset = offset; 2624 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2625 2626 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2627 &read_params.mde, compnp->cname) != 0) { 2628 2629 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2630 "Extent header read failed, block %llu.\n"), offset); 2631 return (mdstealerror(ep, &read_params.mde)); 2632 } 2633 2634 /* make sure magic number is correct */ 2635 if (wm->wm_magic != MD_SP_MAGIC) { 2636 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2637 "found incorrect magic number %x, expected %x.\n"), 2638 wm->wm_magic, MD_SP_MAGIC); 2639 /* 2640 * Pass NULL for the device name as we don't have 2641 * valid watermark contents. 2642 */ 2643 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2644 } 2645 2646 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2647 sizeof (*wm), NULL)) { 2648 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2649 "found incorrect checksum %x.\n"), 2650 wm->wm_checksum); 2651 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2652 } 2653 2654 return (0); 2655 } 2656 2657 /* 2658 * ************************************************************************** 2659 * Query Functions 2660 * ************************************************************************** 2661 */ 2662 2663 /* 2664 * IMPORTANT NOTE: This is a static function that assumes that 2665 * its input parameters have been checked and 2666 * have valid values that lie within acceptable 2667 * ranges. 2668 * 2669 * FUNCTION: meta_sp_enough_space() 2670 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2671 * must be > 0 2672 * desired_sp_size - the desired soft partition size in blocks; 2673 * must be > 0 2674 * extent_listpp - a reference to a reference to an extent 2675 * list that lists the extents on a device; 2676 * must be a reference to a reference to a 2677 * valid extent list 2678 * alignment - the desired data space alignment for the sp's 2679 * OUTPUT: boolean_t return value 2680 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2681 * list to create the desired soft partitions, 2682 * B_FALSE if there's not enough space 2683 * PURPOSE: determines whether there's enough free space in an extent 2684 * list to allow creation of a set of soft partitions 2685 */ 2686 static boolean_t 2687 meta_sp_enough_space( 2688 int desired_number_of_sps, 2689 blkcnt_t desired_sp_size, 2690 sp_ext_node_t **extent_listpp, 2691 sp_ext_length_t alignment 2692 ) 2693 { 2694 boolean_t enough_space; 2695 int number_of_sps; 2696 int number_of_extents_used; 2697 sp_ext_length_t desired_ext_length = desired_sp_size; 2698 2699 enough_space = B_TRUE; 2700 number_of_sps = 0; 2701 while ((enough_space == B_TRUE) && 2702 (number_of_sps < desired_number_of_sps)) { 2703 /* 2704 * Use the extent allocation algorithm implemented by 2705 * meta_sp_alloc_by_len() to test whether the free 2706 * extents in the extent list referenced by *extent_listpp 2707 * contain enough space to accomodate a soft partition 2708 * of size desired_ext_length. 2709 * 2710 * Repeat the test <desired_number_of_sps> times 2711 * or until it fails, whichever comes first, 2712 * each time allocating the extents required to 2713 * create the soft partition without actually 2714 * creating the soft partition. 2715 */ 2716 number_of_extents_used = meta_sp_alloc_by_len( 2717 TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2718 extent_listpp, &desired_ext_length, 2719 NO_OFFSET, alignment); 2720 if (number_of_extents_used == -1) { 2721 enough_space = B_FALSE; 2722 } else { 2723 number_of_sps++; 2724 } 2725 } 2726 return (enough_space); 2727 } 2728 2729 /* 2730 * IMPORTANT NOTE: This is a static function that calls other functions 2731 * that check its mdsetnamep and device_mdnamep 2732 * input parameters, but expects extent_listpp to 2733 * be a initialized to a valid address to which 2734 * it can write a reference to the extent list that 2735 * it creates. 2736 * 2737 * FUNCTION: meta_sp_get_extent_list() 2738 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2739 * for the set containing the device for 2740 * which the extents are to be listed 2741 * device_mdnamep - a reference to the mdname_t structure 2742 * for the device for which the extents 2743 * are to be listed 2744 * OUTPUT: *extent_listpp - a reference to the extent list for 2745 * the device; NULL if the function fails 2746 * *ep - the libmeta error encountered, if any 2747 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2748 * B_FALSE if not 2749 * PURPOSE: gets the extent list for a device 2750 */ 2751 static boolean_t 2752 meta_sp_get_extent_list( 2753 mdsetname_t *mdsetnamep, 2754 mdname_t *device_mdnamep, 2755 sp_ext_node_t **extent_listpp, 2756 md_error_t *ep 2757 ) 2758 { 2759 diskaddr_t device_size_in_blocks; 2760 mdnamelist_t *sp_name_listp; 2761 diskaddr_t start_block_address_in_blocks; 2762 2763 *extent_listpp = NULL; 2764 sp_name_listp = NULL; 2765 2766 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2767 device_mdnamep, ep); 2768 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2769 if (getenv(META_SP_DEBUG)) { 2770 mde_perror(ep, 2771 "meta_sp_get_extent_list:meta_sp_get_start"); 2772 } 2773 return (B_FALSE); 2774 } 2775 2776 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2777 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2778 if (getenv(META_SP_DEBUG)) { 2779 mde_perror(ep, 2780 "meta_sp_get_extent_list:metagetsize"); 2781 } 2782 return (B_FALSE); 2783 } 2784 2785 /* 2786 * Sanity check: the start block will have skipped an integer 2787 * number of cylinders, C. C will usually be zero. If (C > 0), 2788 * and the disk slice happens to only be C cylinders in total 2789 * size, we'll fail this check. 2790 */ 2791 if (device_size_in_blocks <= 2792 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2793 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2794 return (B_FALSE); 2795 } 2796 2797 /* 2798 * After this point, we will have allocated resources, so any 2799 * failure returns must be through the supplied "fail" label 2800 * to properly deallocate things. 2801 */ 2802 2803 /* 2804 * Create an empty extent list that starts one watermark past 2805 * the start block of the device and ends one watermark before 2806 * the end of the device. 2807 */ 2808 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2809 extent_listpp, NO_OFFSET, 2810 (sp_ext_length_t)start_block_address_in_blocks, 2811 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2812 meta_sp_cmp_by_offset); 2813 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2814 extent_listpp, (sp_ext_offset_t)(device_size_in_blocks - 2815 MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, 2816 NO_FLAGS, meta_sp_cmp_by_offset); 2817 2818 /* 2819 * Get the list of soft partitions that are already on the 2820 * device. 2821 */ 2822 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2823 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2824 if (getenv(META_SP_DEBUG)) { 2825 mde_perror(ep, 2826 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2827 } 2828 goto fail; 2829 } 2830 2831 if (sp_name_listp != NULL) { 2832 /* 2833 * If there are soft partitions on the device, add the 2834 * extents used in them to the extent list. 2835 */ 2836 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2837 extent_listpp, ep) == -1) { 2838 if (getenv(META_SP_DEBUG)) { 2839 mde_perror(ep, "meta_sp_get_extent_list:" 2840 "meta_sp_extlist_from_namelist"); 2841 } 2842 goto fail; 2843 } 2844 metafreenamelist(sp_name_listp); 2845 } 2846 2847 /* 2848 * Add free extents to the extent list to represent 2849 * the remaining regions of free space on the 2850 * device. 2851 */ 2852 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2853 return (B_TRUE); 2854 2855 fail: 2856 if (sp_name_listp != NULL) { 2857 metafreenamelist(sp_name_listp); 2858 } 2859 2860 if (*extent_listpp != NULL) { 2861 /* 2862 * meta_sp_list_free sets *extent_listpp to NULL. 2863 */ 2864 meta_sp_list_free(extent_listpp); 2865 } 2866 return (B_FALSE); 2867 } 2868 2869 /* 2870 * IMPORTANT NOTE: This is a static function that calls other functions 2871 * that check its mdsetnamep and mddrivenamep 2872 * input parameters, but expects extent_listpp to 2873 * be a initialized to a valid address to which 2874 * it can write a reference to the extent list that 2875 * it creates. 2876 * 2877 * FUNCTION: meta_sp_get_extent_list_for_drive() 2878 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2879 * for the set containing the drive for 2880 * which the extents are to be listed 2881 * mddrivenamep - a reference to the mddrivename_t structure 2882 * for the drive for which the extents 2883 * are to be listed 2884 * OUTPUT: *extent_listpp - a reference to the extent list for 2885 * the drive; NULL if the function fails 2886 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2887 * B_FALSE if not 2888 * PURPOSE: gets the extent list for a drive when the entire drive 2889 * is to be soft partitioned 2890 */ 2891 static boolean_t 2892 meta_sp_get_extent_list_for_drive( 2893 mdsetname_t *mdsetnamep, 2894 mddrivename_t *mddrivenamep, 2895 sp_ext_node_t **extent_listpp 2896 ) 2897 { 2898 boolean_t can_use; 2899 diskaddr_t free_space; 2900 md_error_t mderror; 2901 mdvtoc_t proposed_vtoc; 2902 int repartition_options; 2903 int return_value; 2904 md_sp_t test_sp_struct; 2905 2906 can_use = B_TRUE; 2907 *extent_listpp = NULL; 2908 mderror = mdnullerror; 2909 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2910 &mderror); 2911 if (test_sp_struct.compnamep == NULL) { 2912 can_use = B_FALSE; 2913 } 2914 2915 if (can_use == B_TRUE) { 2916 mderror = mdnullerror; 2917 repartition_options = 0; 2918 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2919 MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror); 2920 if (return_value != 0) { 2921 can_use = B_FALSE; 2922 } 2923 } 2924 2925 if (can_use == B_TRUE) { 2926 mderror = mdnullerror; 2927 repartition_options = repartition_options | 2928 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2929 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2930 repartition_options, &proposed_vtoc, &mderror); 2931 if (return_value != 0) { 2932 can_use = B_FALSE; 2933 } 2934 } 2935 2936 if (can_use == B_TRUE) { 2937 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2938 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2939 can_use = B_FALSE; 2940 } 2941 } 2942 2943 if (can_use == B_TRUE) { 2944 /* 2945 * Create an extent list that starts with 2946 * a reserved extent that ends at the start 2947 * of the usable space on slice zero of the 2948 * proposed VTOC, ends with an extent that 2949 * reserves space for a watermark at the end 2950 * of slice zero, and contains a single free 2951 * extent that occupies the rest of the space 2952 * on the slice. 2953 * 2954 * NOTE: 2955 * 2956 * Don't use metagetstart() or metagetsize() to 2957 * find the usable space. They query the mdname_t 2958 * structure that represents an actual device to 2959 * determine the amount of space on the device that 2960 * contains metadata and the total amount of space 2961 * on the device. Since this function creates a 2962 * proposed extent list that doesn't reflect the 2963 * state of an actual device, there's no mdname_t 2964 * structure to be queried. 2965 * 2966 * When a drive is reformatted to prepare for 2967 * soft partitioning, all of slice seven is 2968 * reserved for metadata, all of slice zero is 2969 * available for soft partitioning, and all other 2970 * slices on the drive are empty. The proposed 2971 * extent list for the drive therefore contains 2972 * only three extents: a reserved extent that ends 2973 * at the start of the usable space on slice zero, 2974 * a single free extent that occupies all the usable 2975 * space on slice zero, and an ending extent that 2976 * reserves space for a watermark at the end of 2977 * slice zero. 2978 */ 2979 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2980 extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START), 2981 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2982 meta_sp_cmp_by_offset); 2983 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2984 extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2985 MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS, 2986 meta_sp_cmp_by_offset); 2987 meta_sp_list_freefill(extent_listpp, free_space); 2988 } 2989 return (can_use); 2990 } 2991 2992 /* 2993 * FUNCTION: meta_sp_can_create_sps() 2994 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2995 * for the set containing the device for 2996 * which the extents are to be listed 2997 * mdnamep - a reference to the mdname_t of the device 2998 * on which the soft parititions are to be created 2999 * number_of_sps - the desired number of soft partitions 3000 * sp_size - the desired soft partition size 3001 * OUTPUT: boolean_t return value 3002 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3003 * B_FALSE if not 3004 * PURPOSE: determines whether a set of soft partitions can be created 3005 * on a device 3006 */ 3007 boolean_t 3008 meta_sp_can_create_sps( 3009 mdsetname_t *mdsetnamep, 3010 mdname_t *mdnamep, 3011 int number_of_sps, 3012 blkcnt_t sp_size 3013 ) 3014 { 3015 sp_ext_node_t *extent_listp; 3016 boolean_t succeeded; 3017 md_error_t mde; 3018 3019 if ((number_of_sps > 0) && (sp_size > 0)) { 3020 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3021 &extent_listp, &mde); 3022 } else { 3023 succeeded = B_FALSE; 3024 } 3025 3026 /* 3027 * We don't really care about an error return from the 3028 * alignment call; that will just result in passing zero, 3029 * which will be interpreted as no alignment. 3030 */ 3031 3032 if (succeeded == B_TRUE) { 3033 succeeded = meta_sp_enough_space(number_of_sps, 3034 sp_size, &extent_listp, 3035 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3036 meta_sp_list_free(&extent_listp); 3037 } 3038 return (succeeded); 3039 } 3040 3041 /* 3042 * FUNCTION: meta_sp_can_create_sps_on_drive() 3043 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3044 * for the set containing the drive for 3045 * which the extents are to be listed 3046 * mddrivenamep - a reference to the mddrivename_t of the drive 3047 * on which the soft parititions are to be created 3048 * number_of_sps - the desired number of soft partitions 3049 * sp_size - the desired soft partition size 3050 * OUTPUT: boolean_t return value 3051 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3052 * B_FALSE if not 3053 * PURPOSE: determines whether a set of soft partitions can be created 3054 * on a drive if the entire drive is soft partitioned 3055 */ 3056 boolean_t 3057 meta_sp_can_create_sps_on_drive( 3058 mdsetname_t *mdsetnamep, 3059 mddrivename_t *mddrivenamep, 3060 int number_of_sps, 3061 blkcnt_t sp_size 3062 ) 3063 { 3064 sp_ext_node_t *extent_listp; 3065 boolean_t succeeded; 3066 3067 if ((number_of_sps > 0) && (sp_size > 0)) { 3068 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3069 mddrivenamep, &extent_listp); 3070 } else { 3071 succeeded = B_FALSE; 3072 } 3073 3074 /* 3075 * We don't care about alignment on the space call because 3076 * we're specifically dealing with a drive, which will have no 3077 * inherent alignment. 3078 */ 3079 3080 if (succeeded == B_TRUE) { 3081 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3082 &extent_listp, SP_UNALIGNED); 3083 meta_sp_list_free(&extent_listp); 3084 } 3085 return (succeeded); 3086 } 3087 3088 /* 3089 * FUNCTION: meta_sp_get_free_space() 3090 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3091 * for the set containing the device for 3092 * which the free space is to be returned 3093 * mdnamep - a reference to the mdname_t of the device 3094 * for which the free space is to be returned 3095 * OUTPUT: blkcnt_t return value 3096 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3097 * PURPOSE: returns the number of blocks of free space on a device 3098 */ 3099 blkcnt_t 3100 meta_sp_get_free_space( 3101 mdsetname_t *mdsetnamep, 3102 mdname_t *mdnamep 3103 ) 3104 { 3105 sp_ext_node_t *extent_listp; 3106 sp_ext_length_t free_blocks; 3107 boolean_t succeeded; 3108 md_error_t mde; 3109 3110 extent_listp = NULL; 3111 free_blocks = 0; 3112 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3113 &extent_listp, &mde); 3114 if (succeeded == B_TRUE) { 3115 free_blocks = meta_sp_list_size(extent_listp, 3116 EXTTYP_FREE, INCLUDE_WM); 3117 meta_sp_list_free(&extent_listp); 3118 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3119 /* 3120 * Subtract a safety margin for watermarks when 3121 * computing the number of blocks available for 3122 * use. The actual number of watermarks can't 3123 * be calculated without knowing the exact numbers 3124 * and sizes of both the free extents and the soft 3125 * partitions to be created. The calculation is 3126 * highly complex and error-prone even if those 3127 * quantities are known. The approximate value 3128 * 10 * MD_SP_WMSIZE is within a few blocks of the 3129 * correct value in all practical cases. 3130 */ 3131 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3132 } else { 3133 free_blocks = 0; 3134 } 3135 } else { 3136 mdclrerror(&mde); 3137 } 3138 3139 return (free_blocks); 3140 } 3141 3142 /* 3143 * FUNCTION: meta_sp_get_free_space_on_drive() 3144 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3145 * for the set containing the drive for 3146 * which the free space is to be returned 3147 * mddrivenamep - a reference to the mddrivename_t of the drive 3148 * for which the free space is to be returned 3149 * OUTPUT: blkcnt_t return value 3150 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3151 * PURPOSE: returns the number of blocks of space usable for soft 3152 * partitions on an entire drive, if the entire drive is 3153 * soft partitioned 3154 */ 3155 blkcnt_t 3156 meta_sp_get_free_space_on_drive( 3157 mdsetname_t *mdsetnamep, 3158 mddrivename_t *mddrivenamep 3159 ) 3160 { 3161 sp_ext_node_t *extent_listp; 3162 sp_ext_length_t free_blocks; 3163 boolean_t succeeded; 3164 3165 extent_listp = NULL; 3166 free_blocks = 0; 3167 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3168 mddrivenamep, &extent_listp); 3169 if (succeeded == B_TRUE) { 3170 free_blocks = meta_sp_list_size(extent_listp, 3171 EXTTYP_FREE, INCLUDE_WM); 3172 meta_sp_list_free(&extent_listp); 3173 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3174 /* 3175 * Subtract a safety margin for watermarks when 3176 * computing the number of blocks available for 3177 * use. The actual number of watermarks can't 3178 * be calculated without knowing the exact numbers 3179 * and sizes of both the free extents and the soft 3180 * partitions to be created. The calculation is 3181 * highly complex and error-prone even if those 3182 * quantities are known. The approximate value 3183 * 10 * MD_SP_WMSIZE is within a few blocks of the 3184 * correct value in all practical cases. 3185 */ 3186 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3187 } else { 3188 free_blocks = 0; 3189 } 3190 } 3191 return (free_blocks); 3192 } 3193 3194 /* 3195 * FUNCTION: meta_sp_get_number_of_possible_sps() 3196 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3197 * for the set containing the device for 3198 * which the number of possible soft partitions 3199 * is to be returned 3200 * mdnamep - a reference to the mdname_t of the device 3201 * for which the number of possible soft partitions 3202 * is to be returned 3203 * OUTPUT: int return value 3204 * RETURNS: int - the number of soft partitions of the desired size 3205 * that can be created on the device 3206 * PURPOSE: returns the number of soft partitions of a given size 3207 * that can be created on a device 3208 */ 3209 int 3210 meta_sp_get_number_of_possible_sps( 3211 mdsetname_t *mdsetnamep, 3212 mdname_t *mdnamep, 3213 blkcnt_t sp_size 3214 ) 3215 { 3216 sp_ext_node_t *extent_listp; 3217 int number_of_possible_sps; 3218 boolean_t succeeded; 3219 md_error_t mde; 3220 sp_ext_length_t alignment; 3221 3222 extent_listp = NULL; 3223 number_of_possible_sps = 0; 3224 if (sp_size > 0) { 3225 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3226 mdnamep, &extent_listp, &mde)) == B_FALSE) 3227 mdclrerror(&mde); 3228 } else { 3229 succeeded = B_FALSE; 3230 } 3231 3232 if (succeeded == B_TRUE) { 3233 alignment = meta_sp_get_default_alignment(mdsetnamep, 3234 mdnamep, &mde); 3235 } 3236 3237 while (succeeded == B_TRUE) { 3238 /* 3239 * Keep allocating space from the extent list 3240 * for soft partitions of the desired size until 3241 * there's not enough free space left in the list 3242 * for another soft partiition of that size. 3243 * Add one to the number of possible soft partitions 3244 * for each soft partition for which there is 3245 * enough free space left. 3246 */ 3247 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3248 sp_size, &extent_listp, alignment); 3249 if (succeeded == B_TRUE) { 3250 number_of_possible_sps++; 3251 } 3252 } 3253 if (extent_listp != NULL) { 3254 meta_sp_list_free(&extent_listp); 3255 } 3256 return (number_of_possible_sps); 3257 } 3258 3259 /* 3260 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3261 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3262 * for the set containing the drive for 3263 * which the number of possible soft partitions 3264 * is to be returned 3265 * mddrivenamep - a reference to the mddrivename_t of the drive 3266 * for which the number of possible soft partitions 3267 * is to be returned 3268 * sp_size - the size in blocks of the proposed soft partitions 3269 * OUTPUT: int return value 3270 * RETURNS: int - the number of soft partitions of the desired size 3271 * that can be created on the drive 3272 * PURPOSE: returns the number of soft partitions of a given size 3273 * that can be created on a drive, if the entire drive is 3274 * soft partitioned 3275 */ 3276 int 3277 meta_sp_get_number_of_possible_sps_on_drive( 3278 mdsetname_t *mdsetnamep, 3279 mddrivename_t *mddrivenamep, 3280 blkcnt_t sp_size 3281 ) 3282 { 3283 sp_ext_node_t *extent_listp; 3284 int number_of_possible_sps; 3285 boolean_t succeeded; 3286 3287 extent_listp = NULL; 3288 number_of_possible_sps = 0; 3289 if (sp_size > 0) { 3290 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3291 mddrivenamep, &extent_listp); 3292 } else { 3293 succeeded = B_FALSE; 3294 } 3295 while (succeeded == B_TRUE) { 3296 /* 3297 * Keep allocating space from the extent list 3298 * for soft partitions of the desired size until 3299 * there's not enough free space left in the list 3300 * for another soft partition of that size. 3301 * Add one to the number of possible soft partitions 3302 * for each soft partition for which there is 3303 * enough free space left. 3304 * 3305 * Since it's a drive, not a metadevice, make no 3306 * assumptions about alignment. 3307 */ 3308 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3309 sp_size, &extent_listp, SP_UNALIGNED); 3310 if (succeeded == B_TRUE) { 3311 number_of_possible_sps++; 3312 } 3313 } 3314 if (extent_listp != NULL) { 3315 meta_sp_list_free(&extent_listp); 3316 } 3317 return (number_of_possible_sps); 3318 } 3319 3320 /* 3321 * FUNCTION: meta_sp_get_possible_sp_size() 3322 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3323 * for the set containing the device for 3324 * which the possible soft partition size 3325 * is to be returned 3326 * mdnamep - a reference to the mdname_t of the device 3327 * for which the possible soft partition size 3328 * is to be returned 3329 * number_of_sps - the desired number of soft partitions 3330 * OUTPUT: blkcnt_t return value 3331 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3332 * PURPOSE: returns the maximum possible size of each of a given number of 3333 * soft partitions of equal size that can be created on a device 3334 */ 3335 blkcnt_t 3336 meta_sp_get_possible_sp_size( 3337 mdsetname_t *mdsetnamep, 3338 mdname_t *mdnamep, 3339 int number_of_sps 3340 ) 3341 { 3342 blkcnt_t free_blocks; 3343 blkcnt_t sp_size; 3344 boolean_t succeeded; 3345 3346 sp_size = 0; 3347 if (number_of_sps > 0) { 3348 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3349 sp_size = free_blocks / number_of_sps; 3350 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3351 number_of_sps, sp_size); 3352 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3353 /* 3354 * To compensate for space that may have been 3355 * occupied by watermarks, reduce sp_size by a 3356 * number of blocks equal to the number of soft 3357 * partitions desired, and test again to see 3358 * whether the desired number of soft partitions 3359 * can be created. 3360 */ 3361 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3362 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3363 number_of_sps, sp_size); 3364 } 3365 if (sp_size < 0) { 3366 sp_size = 0; 3367 } 3368 } 3369 return (sp_size); 3370 } 3371 3372 /* 3373 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3374 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3375 * for the set containing the drive for 3376 * which the possible soft partition size 3377 * is to be returned 3378 * mddrivenamep - a reference to the mddrivename_t of the drive 3379 * for which the possible soft partition size 3380 * is to be returned 3381 * number_of_sps - the desired number of soft partitions 3382 * OUTPUT: blkcnt_t return value 3383 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3384 * PURPOSE: returns the maximum possible size of each of a given number of 3385 * soft partitions of equal size that can be created on a drive 3386 * if the entire drive is soft partitioned 3387 */ 3388 blkcnt_t 3389 meta_sp_get_possible_sp_size_on_drive( 3390 mdsetname_t *mdsetnamep, 3391 mddrivename_t *mddrivenamep, 3392 int number_of_sps 3393 ) 3394 { 3395 blkcnt_t free_blocks; 3396 blkcnt_t sp_size; 3397 boolean_t succeeded; 3398 3399 sp_size = 0; 3400 if (number_of_sps > 0) { 3401 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3402 mddrivenamep); 3403 sp_size = free_blocks / number_of_sps; 3404 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3405 mddrivenamep, number_of_sps, sp_size); 3406 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3407 /* 3408 * To compensate for space that may have been 3409 * occupied by watermarks, reduce sp_size by a 3410 * number of blocks equal to the number of soft 3411 * partitions desired, and test again to see 3412 * whether the desired number of soft partitions 3413 * can be created. 3414 */ 3415 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3416 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3417 mddrivenamep, number_of_sps, sp_size); 3418 } 3419 if (sp_size < 0) { 3420 sp_size = 0; 3421 } 3422 } 3423 return (sp_size); 3424 } 3425 3426 /* 3427 * ************************************************************************** 3428 * Unit Structure Manipulation Functions * 3429 * ************************************************************************** 3430 */ 3431 3432 /* 3433 * FUNCTION: meta_sp_fillextarray() 3434 * INPUT: mp - the unit structure to fill 3435 * extlist - the list of extents to fill with 3436 * OUTPUT: none 3437 * RETURNS: void 3438 * PURPOSE: fills in the unit structure extent list with the extents 3439 * specified by extlist. Only extents in extlist with the 3440 * EXTFLG_UPDATE flag are changed in the unit structure, 3441 * and the index into the unit structure is the sequence 3442 * number in the extent list. After all of the nodes have 3443 * been updated the virtual offsets in the unit structure 3444 * are updated to reflect the new lengths. 3445 */ 3446 static void 3447 meta_sp_fillextarray( 3448 mp_unit_t *mp, 3449 sp_ext_node_t *extlist 3450 ) 3451 { 3452 int i; 3453 sp_ext_node_t *ext; 3454 sp_ext_offset_t curvoff = 0LL; 3455 3456 assert(mp != NULL); 3457 3458 /* go through the allocation list and fill in our unit structure */ 3459 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3460 if ((ext->ext_type == EXTTYP_ALLOC) && 3461 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3462 mp->un_ext[ext->ext_seq].un_poff = 3463 ext->ext_offset + MD_SP_WMSIZE; 3464 mp->un_ext[ext->ext_seq].un_len = 3465 ext->ext_length - MD_SP_WMSIZE; 3466 } 3467 } 3468 3469 for (i = 0; i < mp->un_numexts; i++) { 3470 assert(mp->un_ext[i].un_poff != 0); 3471 assert(mp->un_ext[i].un_len != 0); 3472 mp->un_ext[i].un_voff = curvoff; 3473 curvoff += mp->un_ext[i].un_len; 3474 } 3475 } 3476 3477 /* 3478 * FUNCTION: meta_sp_createunit() 3479 * INPUT: np - the name of the device to create a unit structure for 3480 * compnp - the name of the device the soft partition is on 3481 * extlist - the extent list to populate the new unit with 3482 * numexts - the number of extents in the extent list 3483 * len - the total size of the soft partition (sectors) 3484 * status - the initial status of the unit structure 3485 * OUTPUT: ep - return error pointer 3486 * RETURNS: mp_unit_t * - the new unit structure. 3487 * PURPOSE: allocates and fills in a new soft partition unit 3488 * structure to be passed to the soft partitioning driver 3489 * for creation. 3490 */ 3491 static mp_unit_t * 3492 meta_sp_createunit( 3493 mdname_t *np, 3494 mdname_t *compnp, 3495 sp_ext_node_t *extlist, 3496 int numexts, 3497 sp_ext_length_t len, 3498 sp_status_t status, 3499 md_error_t *ep 3500 ) 3501 { 3502 mp_unit_t *mp; 3503 uint_t ms_size; 3504 3505 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3506 (numexts * sizeof (mp->un_ext[0])); 3507 3508 mp = Zalloc(ms_size); 3509 3510 /* fill in fields in common unit structure */ 3511 mp->c.un_type = MD_METASP; 3512 mp->c.un_size = ms_size; 3513 MD_SID(mp) = meta_getminor(np->dev); 3514 mp->c.un_total_blocks = len; 3515 mp->c.un_actual_tb = len; 3516 3517 /* set up geometry */ 3518 (void) meta_sp_setgeom(np, compnp, mp, ep); 3519 3520 /* if we're building on metadevice we can't parent */ 3521 if (metaismeta(compnp)) 3522 MD_CAPAB(mp) = MD_CANT_PARENT; 3523 else 3524 MD_CAPAB(mp) = MD_CAN_PARENT; 3525 3526 /* fill soft partition-specific fields */ 3527 mp->un_dev = compnp->dev; 3528 mp->un_key = compnp->key; 3529 3530 /* mdname_t start_blk field is not 64-bit! */ 3531 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3532 mp->un_status = status; 3533 mp->un_numexts = numexts; 3534 mp->un_length = len; 3535 3536 /* fill in the extent array */ 3537 meta_sp_fillextarray(mp, extlist); 3538 3539 return (mp); 3540 } 3541 3542 /* 3543 * FUNCTION: meta_sp_updateunit() 3544 * INPUT: np - name structure for the metadevice being updated 3545 * old_un - the original unit structure that is being updated 3546 * extlist - the extent list to populate the new unit with 3547 * grow_len - the amount by which the partition is being grown 3548 * numexts - the number of extents in the extent list 3549 * ep - return error pointer 3550 * OUTPUT: none 3551 * RETURNS: mp_unit_t * - the updated unit structure 3552 * PURPOSE: allocates and fills in a new soft partition unit structure to 3553 * be passed to the soft partitioning driver for creation. The 3554 * old unit structure is first copied in, and then the updated 3555 * extents are changed in the new unit structure. This is 3556 * typically used when the size of an existing unit is changed. 3557 */ 3558 static mp_unit_t * 3559 meta_sp_updateunit( 3560 mdname_t *np, 3561 mp_unit_t *old_un, 3562 sp_ext_node_t *extlist, 3563 sp_ext_length_t grow_len, 3564 int numexts, 3565 md_error_t *ep 3566 ) 3567 { 3568 mp_unit_t *new_un; 3569 sp_ext_length_t new_len; 3570 uint_t new_size; 3571 3572 assert(old_un != NULL); 3573 assert(extlist != NULL); 3574 3575 /* allocate new unit structure and copy in old unit */ 3576 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3577 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3578 new_len = old_un->un_length + grow_len; 3579 new_un = Zalloc(new_size); 3580 bcopy(old_un, new_un, old_un->c.un_size); 3581 3582 /* update size and geometry information */ 3583 new_un->c.un_size = new_size; 3584 new_un->un_length = new_len; 3585 new_un->c.un_total_blocks = new_len; 3586 new_un->c.un_actual_tb = new_len; 3587 if (meta_adjust_geom((md_unit_t *)new_un, np, 3588 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3589 0, ep) != 0) { 3590 Free(new_un); 3591 return (NULL); 3592 } 3593 3594 /* update extent information */ 3595 new_un->un_numexts += numexts; 3596 3597 meta_sp_fillextarray(new_un, extlist); 3598 3599 return (new_un); 3600 } 3601 3602 /* 3603 * FUNCTION: meta_get_sp() 3604 * INPUT: sp - the set name for the device to get 3605 * np - the name of the device to get 3606 * OUTPUT: ep - return error pointer 3607 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3608 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3609 * for the named device. Just a wrapper for meta_get_sp_common(). 3610 */ 3611 md_sp_t * 3612 meta_get_sp( 3613 mdsetname_t *sp, 3614 mdname_t *np, 3615 md_error_t *ep 3616 ) 3617 { 3618 return (meta_get_sp_common(sp, np, 0, ep)); 3619 } 3620 3621 /* 3622 * FUNCTION: meta_get_sp_common() 3623 * INPUT: sp - the set name for the device to get 3624 * np - the name of the device to get 3625 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3626 * OUTPUT: ep - return error pointer 3627 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3628 * NULL if np is not a soft partition 3629 * PURPOSE: common routine for fetching a soft partition unit structure 3630 */ 3631 md_sp_t * 3632 meta_get_sp_common( 3633 mdsetname_t *sp, 3634 mdname_t *np, 3635 int fast, 3636 md_error_t *ep 3637 ) 3638 { 3639 mddrivename_t *dnp = np->drivenamep; 3640 char *miscname; 3641 mp_unit_t *mp; 3642 md_sp_t *msp; 3643 int i; 3644 3645 /* must have set */ 3646 assert(sp != NULL); 3647 3648 /* short circuit */ 3649 if (dnp->unitp != NULL) { 3650 if (dnp->unitp->type != MD_METASP) 3651 return (NULL); 3652 return ((md_sp_t *)dnp->unitp); 3653 } 3654 /* get miscname and unit */ 3655 if ((miscname = metagetmiscname(np, ep)) == NULL) 3656 return (NULL); 3657 3658 if (strcmp(miscname, MD_SP) != 0) { 3659 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3660 return (NULL); 3661 } 3662 3663 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3664 return (NULL); 3665 3666 assert(mp->c.un_type == MD_METASP); 3667 3668 /* allocate soft partition */ 3669 msp = Zalloc(sizeof (*msp)); 3670 3671 /* get the common information */ 3672 msp->common.namep = np; 3673 msp->common.type = mp->c.un_type; 3674 msp->common.state = mp->c.un_status; 3675 msp->common.capabilities = mp->c.un_capabilities; 3676 msp->common.parent = mp->c.un_parent; 3677 msp->common.size = mp->c.un_total_blocks; 3678 msp->common.user_flags = mp->c.un_user_flags; 3679 msp->common.revision = mp->c.un_revision; 3680 3681 /* get soft partition information */ 3682 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3683 goto out; 3684 3685 /* 3686 * Fill in the key and the start block. Note that the start 3687 * block in the unit structure is 64 bits but the name pointer 3688 * only supports 32 bits. 3689 */ 3690 msp->compnamep->key = mp->un_key; 3691 msp->compnamep->start_blk = mp->un_start_blk; 3692 3693 /* fill in status field */ 3694 msp->status = mp->un_status; 3695 3696 /* allocate the extents */ 3697 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3698 msp->ext.ext_len = mp->un_numexts; 3699 3700 /* do the extents for this soft partition */ 3701 for (i = 0; i < mp->un_numexts; i++) { 3702 struct mp_ext *mde = &mp->un_ext[i]; 3703 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3704 3705 extp->voff = mde->un_voff; 3706 extp->poff = mde->un_poff; 3707 extp->len = mde->un_len; 3708 } 3709 3710 /* cleanup, return success */ 3711 Free(mp); 3712 dnp->unitp = (md_common_t *)msp; 3713 return (msp); 3714 3715 out: 3716 /* clean up and return error */ 3717 Free(mp); 3718 Free(msp); 3719 return (NULL); 3720 } 3721 3722 3723 /* 3724 * FUNCTION: meta_init_sp() 3725 * INPUT: spp - the set name for the new device 3726 * argc - the remaining argument count for the metainit cmdline 3727 * argv - the remainder of the unparsed command line 3728 * options - global options parsed by metainit 3729 * OUTPUT: ep - return error pointer 3730 * RETURNS: int - -1 failure, 0 success 3731 * PURPOSE: provides the command line parsing and name management overhead 3732 * for creating a new soft partition. Ultimately this calls 3733 * meta_create_sp() which does the real work of allocating space 3734 * for the new soft partition. 3735 */ 3736 int 3737 meta_init_sp( 3738 mdsetname_t **spp, 3739 int argc, 3740 char *argv[], 3741 mdcmdopts_t options, 3742 md_error_t *ep 3743 ) 3744 { 3745 char *compname = NULL; 3746 mdname_t *spcompnp = NULL; /* name of component volume */ 3747 char *devname = argv[0]; /* unit name */ 3748 mdname_t *np = NULL; /* name of soft partition */ 3749 md_sp_t *msp = NULL; 3750 int c; 3751 int old_optind; 3752 sp_ext_length_t len = 0LL; 3753 int rval = -1; 3754 uint_t seq; 3755 int oflag; 3756 int failed; 3757 mddrivename_t *dnp = NULL; 3758 sp_ext_length_t alignment = 0LL; 3759 sp_ext_node_t *extlist = NULL; 3760 3761 assert(argc > 0); 3762 3763 /* expect sp name, -p, optional -e, compname, and size parameters */ 3764 /* grab soft partition name */ 3765 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3766 goto out; 3767 3768 /* see if it exists already */ 3769 if (metagetmiscname(np, ep) != NULL) { 3770 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3771 meta_getminor(np->dev), devname); 3772 goto out; 3773 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3774 goto out; 3775 } else { 3776 mdclrerror(ep); 3777 } 3778 --argc, ++argv; 3779 3780 if (argc == 0) 3781 goto syntax; 3782 3783 /* grab -p */ 3784 if (strcmp(argv[0], "-p") != 0) 3785 goto syntax; 3786 --argc, ++argv; 3787 3788 if (argc == 0) 3789 goto syntax; 3790 3791 /* see if -e is there */ 3792 if (strcmp(argv[0], "-e") == 0) { 3793 /* use the whole disk */ 3794 options |= MDCMD_USE_WHOLE_DISK; 3795 --argc, ++argv; 3796 } 3797 3798 if (argc == 0) 3799 goto syntax; 3800 3801 /* get component name */ 3802 compname = Strdup(argv[0]); 3803 3804 if (options & MDCMD_USE_WHOLE_DISK) { 3805 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3806 goto out; 3807 } 3808 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3809 goto out; 3810 } 3811 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3812 goto out; 3813 } 3814 assert(*spp != NULL); 3815 3816 if (!(options & MDCMD_NOLOCK)) { 3817 /* grab set lock */ 3818 if (meta_lock(*spp, TRUE, ep)) 3819 goto out; 3820 3821 if (meta_check_ownership(*spp, ep) != 0) 3822 goto out; 3823 } 3824 3825 /* allocate the soft partition */ 3826 msp = Zalloc(sizeof (*msp)); 3827 3828 /* setup common */ 3829 msp->common.namep = np; 3830 msp->common.type = MD_METASP; 3831 3832 compname = spcompnp->cname; 3833 3834 assert(spcompnp->rname != NULL); 3835 --argc, ++argv; 3836 3837 if (argc == 0) { 3838 goto syntax; 3839 } 3840 3841 if (*argv[0] == '-') { 3842 /* 3843 * parse any other command line options, this includes 3844 * the recovery options -o and -b. The special thing 3845 * with these options is that the len needs to be 3846 * kept track of otherwise when the geometry of the 3847 * "device" is built it will create an invalid geometry 3848 */ 3849 old_optind = optind = 0; 3850 opterr = 0; 3851 oflag = 0; 3852 seq = 0; 3853 failed = 0; 3854 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3855 sp_ext_offset_t offset; 3856 sp_ext_length_t length; 3857 longlong_t tmp_size; 3858 3859 switch (c) { 3860 case 'A': /* data alignment */ 3861 if (meta_sp_parsesizestring(optarg, 3862 &alignment) == -1) { 3863 failed = 1; 3864 } 3865 break; 3866 case 'o': /* offset in the partition */ 3867 if (oflag == 1) { 3868 failed = 1; 3869 } else { 3870 tmp_size = atoll(optarg); 3871 if (tmp_size <= 0) { 3872 failed = 1; 3873 } else { 3874 oflag = 1; 3875 options |= MDCMD_DIRECT; 3876 3877 offset = tmp_size; 3878 } 3879 } 3880 3881 break; 3882 case 'b': /* number of blocks */ 3883 if (oflag == 0) { 3884 failed = 1; 3885 } else { 3886 tmp_size = atoll(optarg); 3887 if (tmp_size <= 0) { 3888 failed = 1; 3889 } else { 3890 oflag = 0; 3891 3892 length = tmp_size; 3893 3894 /* we have a pair of values */ 3895 meta_sp_list_insert(*spp, np, 3896 &extlist, offset, length, 3897 EXTTYP_ALLOC, seq++, 3898 EXTFLG_UPDATE, 3899 meta_sp_cmp_by_offset); 3900 len += length; 3901 } 3902 } 3903 3904 break; 3905 default: 3906 argc -= old_optind; 3907 argv += old_optind; 3908 goto options; 3909 } 3910 3911 if (failed) { 3912 argc -= old_optind; 3913 argv += old_optind; 3914 goto syntax; 3915 } 3916 3917 old_optind = optind; 3918 } 3919 argc -= optind; 3920 argv += optind; 3921 3922 /* 3923 * Must have matching pairs of -o and -b flags 3924 */ 3925 if (oflag != 0) 3926 goto syntax; 3927 3928 /* 3929 * Can't specify both layout (indicated indirectly by 3930 * len being set by thye -o/-b cases above) AND 3931 * alignment 3932 */ 3933 if ((len > 0LL) && (alignment > 0LL)) 3934 goto syntax; 3935 3936 /* 3937 * sanity check the allocation list 3938 */ 3939 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3940 goto syntax; 3941 } 3942 3943 if (len == 0LL) { 3944 if (argc == 0) 3945 goto syntax; 3946 if (meta_sp_parsesize(argv[0], &len) == -1) 3947 goto syntax; 3948 --argc, ++argv; 3949 } 3950 3951 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3952 msp->ext.ext_val->len = len; 3953 msp->compnamep = spcompnp; 3954 3955 /* we should be at the end */ 3956 if (argc != 0) 3957 goto syntax; 3958 3959 /* create soft partition */ 3960 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3961 goto out; 3962 rval = 0; 3963 3964 /* let em know */ 3965 if (options & MDCMD_PRINT) { 3966 (void) printf(dgettext(TEXT_DOMAIN, 3967 "%s: Soft Partition is setup\n"), 3968 devname); 3969 (void) fflush(stdout); 3970 } 3971 goto out; 3972 3973 syntax: 3974 /* syntax error */ 3975 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3976 goto out; 3977 3978 options: 3979 /* options error */ 3980 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3981 goto out; 3982 3983 out: 3984 if (msp != NULL) { 3985 if (msp->ext.ext_val != NULL) { 3986 Free(msp->ext.ext_val); 3987 } 3988 Free(msp); 3989 } 3990 3991 return (rval); 3992 } 3993 3994 /* 3995 * FUNCTION: meta_free_sp() 3996 * INPUT: msp - the soft partition unit to free 3997 * OUTPUT: none 3998 * RETURNS: void 3999 * PURPOSE: provides an interface from the rest of libmeta for freeing a 4000 * soft partition unit 4001 */ 4002 void 4003 meta_free_sp(md_sp_t *msp) 4004 { 4005 Free(msp); 4006 } 4007 4008 /* 4009 * FUNCTION: meta_sp_issp() 4010 * INPUT: sp - the set name to check 4011 * np - the name to check 4012 * OUTPUT: ep - return error pointer 4013 * RETURNS: int - 0 means sp,np is a soft partition 4014 * 1 means sp,np is not a soft partition 4015 * PURPOSE: determines whether the given device is a soft partition 4016 * device. This is called by other metadevice check routines. 4017 */ 4018 int 4019 meta_sp_issp( 4020 mdsetname_t *sp, 4021 mdname_t *np, 4022 md_error_t *ep 4023 ) 4024 { 4025 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 4026 return (1); 4027 4028 return (0); 4029 } 4030 4031 /* 4032 * FUNCTION: meta_check_sp() 4033 * INPUT: sp - the set name to check 4034 * msp - the unit structure to check 4035 * options - creation options 4036 * OUTPUT: repart_options - options to be passed to 4037 * meta_repartition_drive() 4038 * ep - return error pointer 4039 * RETURNS: int - 0 ok to create on this component 4040 * -1 error or not ok to create on this component 4041 * PURPOSE: Checks to determine whether the rules for creation of 4042 * soft partitions allow creation of a soft partition on 4043 * the device described by the mdname_t structure referred 4044 * to by msp->compnamep. 4045 * 4046 * NOTE: Does NOT check to determine whether the extents 4047 * described in the md_sp_t structure referred to by 4048 * msp will fit on the device described by the mdname_t 4049 * structure located at msp->compnamep. 4050 */ 4051 static int 4052 meta_check_sp( 4053 mdsetname_t *sp, 4054 md_sp_t *msp, 4055 mdcmdopts_t options, 4056 int *repart_options, 4057 md_error_t *ep 4058 ) 4059 { 4060 md_common_t *mdp; 4061 mdname_t *compnp = msp->compnamep; 4062 uint_t slice; 4063 mddrivename_t *dnp; 4064 mdname_t *slicenp; 4065 mdvtoc_t *vtocp; 4066 4067 /* make sure it is in the set */ 4068 if (meta_check_inset(sp, compnp, ep) != 0) 4069 return (-1); 4070 4071 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4072 uint_t rep_slice; 4073 4074 /* 4075 * check to make sure we can partition this drive. 4076 * we cannot continue if any of the following are 4077 * true: 4078 * The drive is a metadevice. 4079 * The drive contains a mounted slice. 4080 * The drive contains a slice being swapped to. 4081 * The drive contains slices which are part of other 4082 * metadevices. 4083 * The drive contains a metadb. 4084 */ 4085 if (metaismeta(compnp)) 4086 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4087 compnp->cname)); 4088 4089 assert(compnp->drivenamep != NULL); 4090 4091 /* 4092 * ensure that we have slice 0 since the disk will be 4093 * repartitioned in the USE_WHOLE_DISK case. this check 4094 * is redundant unless the user incorrectly specifies a 4095 * a fully qualified drive AND slice name (i.e., 4096 * /dev/dsk/cXtXdXsX), which will be incorrectly 4097 * recognized as a drive name by the metaname code. 4098 */ 4099 4100 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4101 return (-1); 4102 if (slice != MD_SLICE0) 4103 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4104 4105 dnp = compnp->drivenamep; 4106 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4107 return (-1); 4108 4109 for (slice = 0; slice < vtocp->nparts; slice++) { 4110 4111 /* only check if the slice really exists */ 4112 if (vtocp->parts[slice].size == 0) 4113 continue; 4114 4115 slicenp = metaslicename(dnp, slice, ep); 4116 if (slicenp == NULL) 4117 return (-1); 4118 4119 /* check to ensure that it is not already in use */ 4120 if (meta_check_inuse(sp, 4121 slicenp, MDCHK_INUSE, ep) != 0) { 4122 return (-1); 4123 } 4124 4125 /* 4126 * Up to this point, tests are applied to all 4127 * slices uniformly. 4128 */ 4129 4130 if (slice == rep_slice) { 4131 /* 4132 * Tests inside the body of this 4133 * conditional are applied only to 4134 * slice seven. 4135 */ 4136 if (meta_check_inmeta(sp, slicenp, 4137 options | MDCHK_ALLOW_MDDB | 4138 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4139 return (-1); 4140 4141 /* 4142 * For slice seven, a metadb is NOT an 4143 * automatic failure. It merely means 4144 * that we're not allowed to muck 4145 * about with the partitioning of that 4146 * slice. We indicate this by masking 4147 * in the MD_REPART_LEAVE_REP flag. 4148 */ 4149 if (metahasmddb(sp, slicenp, ep)) { 4150 assert(repart_options != 4151 NULL); 4152 *repart_options |= 4153 MD_REPART_LEAVE_REP; 4154 } 4155 4156 /* 4157 * Skip the remaining tests for slice 4158 * seven 4159 */ 4160 continue; 4161 } 4162 4163 /* 4164 * Tests below this point will be applied to 4165 * all slices EXCEPT for the replica slice. 4166 */ 4167 4168 4169 /* check if component is in a metadevice */ 4170 if (meta_check_inmeta(sp, slicenp, options, 0, 4171 -1, ep) != 0) 4172 return (-1); 4173 4174 /* check to see if component has a metadb */ 4175 if (metahasmddb(sp, slicenp, ep)) 4176 return (mddeverror(ep, MDE_HAS_MDDB, 4177 slicenp->dev, slicenp->cname)); 4178 } 4179 /* 4180 * This should be all of the testing necessary when 4181 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4182 * meta_check_sp() is oriented towards component 4183 * arguments instead of disks. 4184 */ 4185 goto meta_check_sp_ok; 4186 4187 } 4188 4189 /* check to ensure that it is not already in use */ 4190 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4191 return (-1); 4192 } 4193 4194 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4195 4196 /* 4197 * The component can have one or more soft partitions on it 4198 * already, but can't be part of any other type of metadevice, 4199 * so if it is used for a metadevice, but the metadevice 4200 * isn't a soft partition, return failure. 4201 */ 4202 4203 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4204 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4205 return (-1); 4206 } 4207 } else { /* handle metadevices */ 4208 /* get underlying unit & check capabilities */ 4209 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4210 return (-1); 4211 4212 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4213 (! (mdp->capabilities & MD_CAN_SP))) 4214 return (mdmderror(ep, MDE_INVAL_UNIT, 4215 meta_getminor(compnp->dev), compnp->cname)); 4216 } 4217 4218 meta_check_sp_ok: 4219 mdclrerror(ep); 4220 return (0); 4221 } 4222 4223 /* 4224 * FUNCTION: meta_create_sp() 4225 * INPUT: sp - the set name to create in 4226 * msp - the unit structure to create 4227 * oblist - an optional list of requested extents (-o/-b options) 4228 * options - creation options 4229 * alignment - data alignment 4230 * OUTPUT: ep - return error pointer 4231 * RETURNS: int - 0 success, -1 error 4232 * PURPOSE: does most of the work for creating a soft partition. If 4233 * metainit -p -e was used, first partition the drive. Then 4234 * create an extent list based on the existing soft partitions 4235 * and assume all space not used by them is free. Storage for 4236 * the new soft partition is allocated from the free extents 4237 * based on the length specified on the command line or the 4238 * oblist passed in. The unit structure is then committed and 4239 * the watermarks are updated. Finally, the status is changed to 4240 * Okay and the process is complete. 4241 */ 4242 static int 4243 meta_create_sp( 4244 mdsetname_t *sp, 4245 md_sp_t *msp, 4246 sp_ext_node_t *oblist, 4247 mdcmdopts_t options, 4248 sp_ext_length_t alignment, 4249 md_error_t *ep 4250 ) 4251 { 4252 mdname_t *np = msp->common.namep; 4253 mdname_t *compnp = msp->compnamep; 4254 mp_unit_t *mp = NULL; 4255 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4256 md_set_params_t set_params; 4257 int rval = -1; 4258 diskaddr_t comp_size; 4259 diskaddr_t sp_start; 4260 sp_ext_node_t *extlist = NULL; 4261 int numexts = 0; /* number of extents */ 4262 int count = 0; 4263 int committed = 0; 4264 int repart_options = MD_REPART_FORCE; 4265 int create_flag = MD_CRO_32BIT; 4266 int mn_set_master = 0; 4267 4268 md_set_desc *sd; 4269 md_set_mmown_params_t *ownpar = NULL; 4270 int comp_is_mirror = 0; 4271 4272 /* validate soft partition */ 4273 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4274 return (-1); 4275 4276 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4277 if ((options & MDCMD_DOIT) != 0) { 4278 if (meta_repartition_drive(sp, 4279 compnp->drivenamep, 4280 repart_options, 4281 NULL, /* Don't return the VTOC */ 4282 ep) != 0) 4283 4284 return (-1); 4285 } else { 4286 /* 4287 * If -n and -e are both specified, it doesn't make 4288 * sense to continue without actually partitioning 4289 * the drive. 4290 */ 4291 return (0); 4292 } 4293 } 4294 4295 /* populate the start_blk field of the component name */ 4296 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4297 MD_DISKADDR_ERROR) { 4298 rval = -1; 4299 goto out; 4300 } 4301 4302 if (options & MDCMD_DOIT) { 4303 /* store name in namespace */ 4304 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4305 rval = -1; 4306 goto out; 4307 } 4308 } 4309 4310 /* 4311 * Get a list of the soft partitions that currently reside on 4312 * the component. We should ALWAYS force reload the cache, 4313 * because if this is a single creation, there will not BE a 4314 * cached list, and if we're using the md.tab, we must rebuild 4315 * the list because it won't contain the previous (if any) 4316 * soft partition. 4317 */ 4318 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4319 if (count < 0) { 4320 /* error occured */ 4321 rval = -1; 4322 goto out; 4323 } 4324 4325 /* 4326 * get the size of the underlying device. if the size is smaller 4327 * than or equal to the watermark size, we know there isn't 4328 * enough space. 4329 */ 4330 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4331 rval = -1; 4332 goto out; 4333 } else if (comp_size <= MD_SP_WMSIZE) { 4334 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4335 rval = -1; 4336 goto out; 4337 } 4338 /* 4339 * seed extlist with reserved space at the beginning of the volume and 4340 * enough space for the end watermark. The end watermark always gets 4341 * updated, but if the underlying device changes size it may not be 4342 * pointed to until the extent before it is updated. Since the 4343 * end of the reserved space is where the first watermark starts, 4344 * the reserved extent should never be marked for updating. 4345 */ 4346 4347 meta_sp_list_insert(NULL, NULL, &extlist, 4348 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4349 meta_sp_list_insert(NULL, NULL, &extlist, 4350 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4351 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4352 4353 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4354 rval = -1; 4355 goto out; 4356 } 4357 4358 metafreenamelist(spnlp); 4359 4360 if (getenv(META_SP_DEBUG)) { 4361 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4362 meta_sp_list_dump(extlist); 4363 } 4364 4365 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4366 4367 /* get extent list from -o/-b options or from free space */ 4368 if (options & MDCMD_DIRECT) { 4369 if (getenv(META_SP_DEBUG)) { 4370 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4371 meta_sp_list_dump(oblist); 4372 } 4373 4374 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4375 if (numexts == -1) { 4376 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4377 rval = -1; 4378 goto out; 4379 } 4380 } else { 4381 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4382 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4383 meta_sp_get_default_alignment(sp, compnp, ep)); 4384 if (numexts == -1) { 4385 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4386 rval = -1; 4387 goto out; 4388 } 4389 } 4390 4391 assert(extlist != NULL); 4392 4393 /* create soft partition */ 4394 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4395 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4396 4397 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4398 4399 /* if we're not doing anything (metainit -n), return success */ 4400 if (! (options & MDCMD_DOIT)) { 4401 rval = 0; /* success */ 4402 goto out; 4403 } 4404 4405 (void) memset(&set_params, 0, sizeof (set_params)); 4406 4407 if (create_flag == MD_CRO_64BIT) { 4408 mp->c.un_revision |= MD_64BIT_META_DEV; 4409 set_params.options = MD_CRO_64BIT; 4410 } else { 4411 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4412 set_params.options = MD_CRO_32BIT; 4413 } 4414 4415 if (getenv(META_SP_DEBUG)) { 4416 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4417 meta_sp_printunit(mp); 4418 } 4419 4420 /* 4421 * Check to see if we're trying to create a partition on a mirror. If so 4422 * we may have to enforce an ownership change before writing the 4423 * watermark out. 4424 */ 4425 if (metaismeta(compnp)) { 4426 char *miscname; 4427 4428 miscname = metagetmiscname(compnp, ep); 4429 if (miscname != NULL) 4430 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4431 else 4432 comp_is_mirror = 0; 4433 } else { 4434 comp_is_mirror = 0; 4435 } 4436 4437 /* 4438 * For a multi-node environment we have to ensure that the master 4439 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4440 * If the master does not own the device we will deadlock as the 4441 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4442 * ownership change that will block as the MD_IOCSET is still in 4443 * progress. To close this window we force an owner change to occur 4444 * before issuing the MD_IOCSET. We cannot simply open the device and 4445 * write to it as this will only work for the first soft-partition 4446 * creation. 4447 */ 4448 4449 if (comp_is_mirror && !metaislocalset(sp)) { 4450 4451 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4452 rval = -1; 4453 goto out; 4454 } 4455 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4456 mn_set_master = 1; 4457 } 4458 } 4459 4460 set_params.mnum = MD_SID(mp); 4461 set_params.size = mp->c.un_size; 4462 set_params.mdp = (uintptr_t)mp; 4463 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4464 4465 /* first phase of commit. */ 4466 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4467 np->cname) != 0) { 4468 (void) mdstealerror(ep, &set_params.mde); 4469 rval = -1; 4470 goto out; 4471 } 4472 4473 /* we've successfully committed the record */ 4474 committed = 1; 4475 4476 /* write watermarks */ 4477 /* 4478 * Special-case for Multi-node sets. As we now have a distributed DRL 4479 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case 4480 * unless we use a 'special' MN-capable ioctl to stage the watermark 4481 * update. This only affects the master-node in an MN set. 4482 */ 4483 if (mn_set_master) { 4484 if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) { 4485 rval = -1; 4486 goto out; 4487 } 4488 } else { 4489 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4490 rval = -1; 4491 goto out; 4492 } 4493 } 4494 4495 /* second phase of commit, set status to MD_SP_OK */ 4496 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4497 rval = -1; 4498 goto out; 4499 } 4500 rval = 0; 4501 out: 4502 Free(mp); 4503 if (ownpar) 4504 Free(ownpar); 4505 4506 if (extlist != NULL) 4507 meta_sp_list_free(&extlist); 4508 4509 if (rval != 0 && keynlp != NULL && committed != 1) 4510 (void) del_key_names(sp, keynlp, NULL); 4511 4512 metafreenamelist(keynlp); 4513 4514 return (rval); 4515 } 4516 4517 /* 4518 * ************************************************************************** 4519 * Reset (metaclear) Functions * 4520 * ************************************************************************** 4521 */ 4522 4523 /* 4524 * FUNCTION: meta_sp_reset_common() 4525 * INPUT: sp - the set name of the device to reset 4526 * np - the name of the device to reset 4527 * msp - the unit structure to reset 4528 * options - metaclear options 4529 * OUTPUT: ep - return error pointer 4530 * RETURNS: int - 0 success, -1 error 4531 * PURPOSE: "resets", or more accurately deletes, the soft partition 4532 * specified. First the state is set to "deleting" and then the 4533 * watermarks are all cleared out. Once the watermarks have been 4534 * updated, the unit structure is deleted from the metadb. 4535 */ 4536 static int 4537 meta_sp_reset_common( 4538 mdsetname_t *sp, 4539 mdname_t *np, 4540 md_sp_t *msp, 4541 md_sp_reset_t reset_params, 4542 mdcmdopts_t options, 4543 md_error_t *ep 4544 ) 4545 { 4546 char *miscname; 4547 int rval = -1; 4548 int is_open = 0; 4549 4550 /* make sure that nobody owns us */ 4551 if (MD_HAS_PARENT(msp->common.parent)) 4552 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4553 np->cname)); 4554 4555 /* make sure that the soft partition isn't open */ 4556 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4557 return (-1); 4558 else if (is_open) 4559 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4560 np->cname)); 4561 4562 /* get miscname */ 4563 if ((miscname = metagetmiscname(np, ep)) == NULL) 4564 return (-1); 4565 4566 /* fill in reset params */ 4567 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4568 reset_params.mnum = meta_getminor(np->dev); 4569 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4570 4571 /* 4572 * clear soft partition - phase one. 4573 * place the soft partition into the "delete pending" state. 4574 */ 4575 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4576 return (-1); 4577 4578 /* 4579 * Now clear the watermarks. If the force flag is specified, 4580 * ignore any errors writing the watermarks and delete the unit 4581 * structure anyway. An error may leave the on-disk format in a 4582 * corrupt state. If force is not specified and we fail here, 4583 * the soft partition will remain in the "delete pending" state. 4584 */ 4585 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4586 ((options & MDCMD_FORCE) == 0)) 4587 goto out; 4588 4589 /* 4590 * clear soft partition - phase two. 4591 * the driver removes the soft partition from the metadb and 4592 * zeros out incore version. 4593 */ 4594 if (metaioctl(MD_IOCRESET, &reset_params, 4595 &reset_params.mde, np->cname) != 0) { 4596 (void) mdstealerror(ep, &reset_params.mde); 4597 goto out; 4598 } 4599 4600 /* 4601 * Wait for the /dev to be cleaned up. Ignore the return 4602 * value since there's not much we can do. 4603 */ 4604 (void) meta_update_devtree(meta_getminor(np->dev)); 4605 4606 rval = 0; /* success */ 4607 4608 if (options & MDCMD_PRINT) { 4609 (void) printf(dgettext(TEXT_DOMAIN, 4610 "%s: Soft Partition is cleared\n"), 4611 np->cname); 4612 (void) fflush(stdout); 4613 } 4614 4615 /* 4616 * if told to recurse and on a metadevice, then attempt to 4617 * clear the subdevices. Indicate failure if the clear fails. 4618 */ 4619 if ((options & MDCMD_RECURSE) && 4620 (metaismeta(msp->compnamep)) && 4621 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4622 rval = -1; 4623 4624 out: 4625 meta_invalidate_name(np); 4626 return (rval); 4627 } 4628 4629 /* 4630 * FUNCTION: meta_sp_reset() 4631 * INPUT: sp - the set name of the device to reset 4632 * np - the name of the device to reset 4633 * options - metaclear options 4634 * OUTPUT: ep - return error pointer 4635 * RETURNS: int - 0 success, -1 error 4636 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4637 * soft partition. If np is NULL, then soft partitions are 4638 * all deleted at the current level and then recursively deleted. 4639 * Otherwise, if a name is specified either directly or as a 4640 * result of a recursive operation, it deletes only that name. 4641 * Since something sitting under a soft partition may be parented 4642 * to it, we have to reparent that other device to another soft 4643 * partition on the same component if we're deleting the one it's 4644 * parented to. 4645 */ 4646 int 4647 meta_sp_reset( 4648 mdsetname_t *sp, 4649 mdname_t *np, 4650 mdcmdopts_t options, 4651 md_error_t *ep 4652 ) 4653 { 4654 md_sp_t *msp; 4655 int rval = -1; 4656 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4657 md_sp_reset_t reset_params; 4658 int num_sp; 4659 4660 assert(sp != NULL); 4661 4662 /* reset/delete all soft paritions */ 4663 if (np == NULL) { 4664 /* 4665 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4666 * is incorrect for soft partitions. We want to clear 4667 * all soft partitions at a particular level in the 4668 * metadevice stack before moving to the next level. 4669 * Thus, we clear MDCMD_RECURSE from the options. 4670 */ 4671 options &= ~MDCMD_RECURSE; 4672 4673 /* for each soft partition */ 4674 rval = 0; 4675 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4676 rval = -1; 4677 4678 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4679 np = nlp->namep; 4680 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4681 rval = -1; 4682 break; 4683 } 4684 /* 4685 * meta_reset_all calls us twice to get soft 4686 * partitions at the top and bottom of the stack. 4687 * thus, if we have a parent, we'll get deleted 4688 * on the next call. 4689 */ 4690 if (MD_HAS_PARENT(msp->common.parent)) 4691 continue; 4692 /* 4693 * If this is a multi-node set, we send a series 4694 * of individual metaclear commands. 4695 */ 4696 if (meta_is_mn_set(sp, ep)) { 4697 if (meta_mn_send_metaclear_command(sp, 4698 np->cname, options, 0, ep) != 0) { 4699 rval = -1; 4700 break; 4701 } 4702 } else { 4703 if (meta_sp_reset(sp, np, options, ep) != 0) { 4704 rval = -1; 4705 break; 4706 } 4707 } 4708 } 4709 /* cleanup return status */ 4710 metafreenamelist(spnlp); 4711 return (rval); 4712 } 4713 4714 /* check the name */ 4715 if (metachkmeta(np, ep) != 0) 4716 return (-1); 4717 4718 /* get the unit structure */ 4719 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4720 return (-1); 4721 4722 /* clear out reset parameters */ 4723 (void) memset(&reset_params, 0, sizeof (reset_params)); 4724 4725 /* if our child is a metadevice, we need to deparent/reparent it */ 4726 if (metaismeta(msp->compnamep)) { 4727 /* get sp's on this component */ 4728 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4729 &spnlp, 1, ep)) <= 0) 4730 /* no sp's on this device. error! */ 4731 return (-1); 4732 else if (num_sp == 1) 4733 /* last sp on this device, so we deparent */ 4734 reset_params.new_parent = MD_NO_PARENT; 4735 else { 4736 /* have to reparent this metadevice */ 4737 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4738 if (meta_getminor(nlp->namep->dev) == 4739 meta_getminor(np->dev)) 4740 continue; 4741 /* 4742 * this isn't the softpart we are deleting, 4743 * so use this device as the new parent. 4744 */ 4745 reset_params.new_parent = 4746 meta_getminor(nlp->namep->dev); 4747 break; 4748 } 4749 } 4750 metafreenamelist(spnlp); 4751 } 4752 4753 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4754 return (-1); 4755 4756 return (0); 4757 } 4758 4759 /* 4760 * FUNCTION: meta_sp_reset_component() 4761 * INPUT: sp - the set name of the device to reset 4762 * name - the string name of the device to reset 4763 * options - metaclear options 4764 * OUTPUT: ep - return error pointer 4765 * RETURNS: int - 0 success, -1 error 4766 * PURPOSE: provides the ability to delete all soft partitions on a 4767 * specified device (metaclear -p). It first gets all of the 4768 * soft partitions on the component and then deletes each one 4769 * individually. 4770 */ 4771 int 4772 meta_sp_reset_component( 4773 mdsetname_t *sp, 4774 char *name, 4775 mdcmdopts_t options, 4776 md_error_t *ep 4777 ) 4778 { 4779 mdname_t *compnp, *np; 4780 mdnamelist_t *spnlp = NULL; 4781 mdnamelist_t *nlp = NULL; 4782 md_sp_t *msp; 4783 int count; 4784 md_sp_reset_t reset_params; 4785 4786 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4787 return (-1); 4788 4789 /* If we're starting out with no soft partitions, it's an error */ 4790 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4791 if (count == 0) 4792 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4793 else if (count < 0) 4794 return (-1); 4795 4796 /* 4797 * clear all soft partitions on this component. 4798 * NOTE: we reparent underlying metadevices as we go so that 4799 * things stay sane. Also, if we encounter an error, we stop 4800 * and go no further in case recovery might be needed. 4801 */ 4802 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4803 /* clear out reset parameters */ 4804 (void) memset(&reset_params, 0, sizeof (reset_params)); 4805 4806 /* check the name */ 4807 np = nlp->namep; 4808 4809 if (metachkmeta(np, ep) != 0) { 4810 metafreenamelist(spnlp); 4811 return (-1); 4812 } 4813 4814 /* get the unit structure */ 4815 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4816 metafreenamelist(spnlp); 4817 return (-1); 4818 } 4819 4820 /* have to deparent/reparent metadevices */ 4821 if (metaismeta(compnp)) { 4822 if (nlp->next == NULL) 4823 reset_params.new_parent = MD_NO_PARENT; 4824 else 4825 reset_params.new_parent = 4826 meta_getminor(spnlp->next->namep->dev); 4827 } 4828 4829 /* clear soft partition */ 4830 if (meta_sp_reset_common(sp, np, msp, reset_params, 4831 options, ep) < 0) { 4832 metafreenamelist(spnlp); 4833 return (-1); 4834 } 4835 } 4836 metafreenamelist(spnlp); 4837 return (0); 4838 } 4839 4840 /* 4841 * ************************************************************************** 4842 * Grow (metattach) Functions * 4843 * ************************************************************************** 4844 */ 4845 4846 /* 4847 * FUNCTION: meta_sp_attach() 4848 * INPUT: sp - the set name of the device to attach to 4849 * np - the name of the device to attach to 4850 * addsize - the unparsed string holding the amount of space to add 4851 * options - metattach options 4852 * alignment - data alignment 4853 * OUTPUT: ep - return error pointer 4854 * RETURNS: int - 0 success, -1 error 4855 * PURPOSE: grows a soft partition by reading in the existing unit 4856 * structure and setting its state to Growing, allocating more 4857 * space (similar to meta_create_sp()), updating the watermarks, 4858 * and then writing out the new unit structure in the Okay state. 4859 */ 4860 int 4861 meta_sp_attach( 4862 mdsetname_t *sp, 4863 mdname_t *np, 4864 char *addsize, 4865 mdcmdopts_t options, 4866 sp_ext_length_t alignment, 4867 md_error_t *ep 4868 ) 4869 { 4870 md_grow_params_t grow_params; 4871 sp_ext_length_t grow_len; /* amount to grow */ 4872 mp_unit_t *mp, *new_un; 4873 mdname_t *compnp = NULL; 4874 4875 sp_ext_node_t *extlist = NULL; 4876 int numexts; 4877 mdnamelist_t *spnlp = NULL; 4878 int count; 4879 md_sp_t *msp; 4880 daddr_t start_block; 4881 4882 /* should have the same set */ 4883 assert(sp != NULL); 4884 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4885 4886 /* check name */ 4887 if (metachkmeta(np, ep) != 0) 4888 return (-1); 4889 4890 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4891 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4892 } 4893 4894 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4895 return (-1); 4896 4897 /* make sure we don't have a parent */ 4898 if (MD_HAS_PARENT(mp->c.un_parent)) { 4899 Free(mp); 4900 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4901 } 4902 4903 if (getenv(META_SP_DEBUG)) { 4904 meta_sp_debug("meta_sp_attach: Unit structure before new " 4905 "space:\n"); 4906 meta_sp_printunit(mp); 4907 } 4908 4909 /* 4910 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4911 * If this was not the case we would suffer the following 4912 * assertion failure: 4913 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4914 * file meta_check.x, line 315 4915 * I guess this is because we have not "seen" this drive before 4916 * and hence hit the failure - this is of course the attach routine 4917 */ 4918 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4919 Free(mp); 4920 return (-1); 4921 } 4922 4923 /* metakeyname does not fill in the key. */ 4924 compnp->key = mp->un_key; 4925 4926 /* work out the space on the component that we are dealing with */ 4927 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4928 4929 /* 4930 * see if the component has been soft partitioned yet, or if an 4931 * error occurred. 4932 */ 4933 if (count == 0) { 4934 Free(mp); 4935 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4936 } else if (count < 0) { 4937 Free(mp); 4938 return (-1); 4939 } 4940 4941 /* 4942 * seed extlist with reserved space at the beginning of the volume and 4943 * enough space for the end watermark. The end watermark always gets 4944 * updated, but if the underlying device changes size it may not be 4945 * pointed to until the extent before it is updated. Since the 4946 * end of the reserved space is where the first watermark starts, 4947 * the reserved extent should never be marked for updating. 4948 */ 4949 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4950 MD_DISKADDR_ERROR) { 4951 Free(mp); 4952 return (-1); 4953 } 4954 4955 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4956 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4957 meta_sp_list_insert(NULL, NULL, &extlist, 4958 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4959 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4960 4961 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4962 Free(mp); 4963 return (-1); 4964 } 4965 4966 metafreenamelist(spnlp); 4967 4968 if (getenv(META_SP_DEBUG)) { 4969 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4970 meta_sp_list_dump(extlist); 4971 } 4972 4973 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4974 4975 assert(mp->un_numexts >= 1); 4976 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4977 mp->un_ext[mp->un_numexts - 1].un_poff, 4978 (alignment > 0) ? alignment : 4979 meta_sp_get_default_alignment(sp, compnp, ep)); 4980 4981 if (numexts == -1) { 4982 Free(mp); 4983 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4984 } 4985 4986 /* allocate new unit structure and copy in old unit */ 4987 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4988 grow_len, numexts, ep)) == NULL) { 4989 Free(mp); 4990 return (-1); 4991 } 4992 Free(mp); 4993 4994 /* If running in dryrun mode (-n option), we're done here */ 4995 if ((options & MDCMD_DOIT) == 0) { 4996 if (options & MDCMD_PRINT) { 4997 (void) printf(dgettext(TEXT_DOMAIN, 4998 "%s: Soft Partition would grow\n"), 4999 np->cname); 5000 (void) fflush(stdout); 5001 } 5002 return (0); 5003 } 5004 5005 if (getenv(META_SP_DEBUG)) { 5006 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 5007 meta_sp_printunit(new_un); 5008 } 5009 5010 assert(new_un != NULL); 5011 5012 (void) memset(&grow_params, 0, sizeof (grow_params)); 5013 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 5014 grow_params.options = MD_CRO_64BIT; 5015 new_un->c.un_revision |= MD_64BIT_META_DEV; 5016 } else { 5017 grow_params.options = MD_CRO_32BIT; 5018 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 5019 } 5020 grow_params.mnum = MD_SID(new_un); 5021 grow_params.size = new_un->c.un_size; 5022 grow_params.mdp = (uintptr_t)new_un; 5023 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5024 5025 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5026 np->cname) != 0) { 5027 (void) mdstealerror(ep, &grow_params.mde); 5028 return (-1); 5029 } 5030 5031 /* update all watermarks */ 5032 5033 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5034 return (-1); 5035 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5036 return (-1); 5037 5038 5039 /* second phase of commit, set status to MD_SP_OK */ 5040 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5041 return (-1); 5042 5043 meta_invalidate_name(np); 5044 5045 if (options & MDCMD_PRINT) { 5046 (void) printf(dgettext(TEXT_DOMAIN, 5047 "%s: Soft Partition has been grown\n"), 5048 np->cname); 5049 (void) fflush(stdout); 5050 } 5051 5052 return (0); 5053 } 5054 5055 /* 5056 * ************************************************************************** 5057 * Recovery (metarecover) Functions * 5058 * ************************************************************************** 5059 */ 5060 5061 /* 5062 * FUNCTION: meta_recover_sp() 5063 * INPUT: sp - the name of the set we are recovering on 5064 * compnp - name pointer for device we are recovering on 5065 * argc - argument count 5066 * argv - left over arguments not parsed by metarecover command 5067 * options - metarecover options 5068 * OUTPUT: ep - return error pointer 5069 * RETURNS: int - 0 - success, -1 - error 5070 * PURPOSE: parse soft partitioning-specific metarecover options and 5071 * dispatch to the appropriate function to handle recovery. 5072 */ 5073 int 5074 meta_recover_sp( 5075 mdsetname_t *sp, 5076 mdname_t *compnp, 5077 int argc, 5078 char *argv[], 5079 mdcmdopts_t options, 5080 md_error_t *ep 5081 ) 5082 { 5083 md_set_desc *sd; 5084 5085 if (argc > 1) { 5086 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5087 argc, argv); 5088 return (-1); 5089 } 5090 5091 /* 5092 * For a MN set, this operation must be performed on the master 5093 * as it is responsible for maintaining the watermarks 5094 */ 5095 if (!metaislocalset(sp)) { 5096 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5097 return (-1); 5098 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5099 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5100 sd->sd_mn_master_nodenm, NULL, NULL); 5101 return (-1); 5102 } 5103 } 5104 if (argc == 0) { 5105 /* 5106 * if no additional arguments are passed, metarecover should 5107 * validate both on-disk and metadb structures as well as 5108 * checking that both are consistent with each other 5109 */ 5110 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5111 return (-1); 5112 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5113 return (-1); 5114 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5115 return (-1); 5116 } else if (strcmp(argv[0], "-d") == 0) { 5117 /* 5118 * Ensure that there is no existing valid record for this 5119 * soft-partition. If there is we have nothing to do. 5120 */ 5121 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5122 return (-1); 5123 /* validate and recover from on-disk structures */ 5124 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5125 return (-1); 5126 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5127 return (-1); 5128 } else if (strcmp(argv[0], "-m") == 0) { 5129 /* validate and recover from metadb structures */ 5130 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5131 return (-1); 5132 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5133 return (-1); 5134 } else { 5135 /* syntax error */ 5136 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5137 argc, argv); 5138 return (-1); 5139 } 5140 5141 return (0); 5142 } 5143 5144 /* 5145 * FUNCTION: meta_sp_display_exthdr() 5146 * INPUT: none 5147 * OUTPUT: none 5148 * RETURNS: void 5149 * PURPOSE: print header line for sp_ext_node_t information. to be used 5150 * in conjunction with meta_sp_display_ext(). 5151 */ 5152 static void 5153 meta_sp_display_exthdr(void) 5154 { 5155 (void) printf("%20s %5s %7s %20s %20s\n", 5156 dgettext(TEXT_DOMAIN, "Name"), 5157 dgettext(TEXT_DOMAIN, "Seq#"), 5158 dgettext(TEXT_DOMAIN, "Type"), 5159 dgettext(TEXT_DOMAIN, "Offset"), 5160 dgettext(TEXT_DOMAIN, "Length")); 5161 } 5162 5163 5164 /* 5165 * FUNCTION: meta_sp_display_ext() 5166 * INPUT: ext - extent to display 5167 * OUTPUT: none 5168 * RETURNS: void 5169 * PURPOSE: print selected fields from sp_ext_node_t. 5170 */ 5171 static void 5172 meta_sp_display_ext(sp_ext_node_t *ext) 5173 { 5174 /* print extent information */ 5175 if (ext->ext_namep != NULL) 5176 (void) printf("%20s ", ext->ext_namep->cname); 5177 else 5178 (void) printf("%20s ", "NONE"); 5179 5180 (void) printf("%5u ", ext->ext_seq); 5181 5182 switch (ext->ext_type) { 5183 case EXTTYP_ALLOC: 5184 (void) printf("%7s ", "ALLOC"); 5185 break; 5186 case EXTTYP_FREE: 5187 (void) printf("%7s ", "FREE"); 5188 break; 5189 case EXTTYP_RESERVED: 5190 (void) printf("%7s ", "RESV"); 5191 break; 5192 case EXTTYP_END: 5193 (void) printf("%7s ", "END"); 5194 break; 5195 default: 5196 (void) printf("%7s ", "INVLD"); 5197 break; 5198 } 5199 5200 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5201 } 5202 5203 5204 /* 5205 * FUNCTION: meta_sp_checkseq() 5206 * INPUT: extlist - list of extents to be checked 5207 * OUTPUT: none 5208 * RETURNS: int - 0 - success, -1 - error 5209 * PURPOSE: check soft partition sequence numbers. this function assumes 5210 * that a list of extents representing 1 or more soft partitions 5211 * is passed in sorted in sequence number order. within a 5212 * single soft partition, there may not be any missing or 5213 * duplicate sequence numbers. 5214 */ 5215 static int 5216 meta_sp_checkseq(sp_ext_node_t *extlist) 5217 { 5218 sp_ext_node_t *ext; 5219 5220 assert(extlist != NULL); 5221 5222 for (ext = extlist; 5223 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5224 ext = ext->ext_next) { 5225 if (ext->ext_next->ext_namep != NULL && 5226 strcmp(ext->ext_next->ext_namep->cname, 5227 ext->ext_namep->cname) != 0) 5228 continue; 5229 5230 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5231 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5232 "%s: sequence numbers are " 5233 "incorrect: %d should be %d\n"), 5234 ext->ext_next->ext_namep->cname, 5235 ext->ext_next->ext_seq, ext->ext_seq + 1); 5236 return (-1); 5237 } 5238 } 5239 return (0); 5240 } 5241 5242 5243 /* 5244 * FUNCTION: meta_sp_resolve_name_conflict() 5245 * INPUT: sp - name of set we're are recovering in. 5246 * old_np - name pointer of soft partition we found on disk. 5247 * OUTPUT: new_np - name pointer for new soft partition name. 5248 * ep - error pointer returned. 5249 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5250 * PURPOSE: Check to see if the name of one of the soft partitions we found 5251 * on disk already exists in the metadb. If so, prompt for a new 5252 * name. In addition, we keep a static array of names that 5253 * will be recovered from this device since these names don't 5254 * exist in the configuration at this point but cannot be 5255 * recovered more than once. 5256 */ 5257 static int 5258 meta_sp_resolve_name_conflict( 5259 mdsetname_t *sp, 5260 mdname_t *old_np, 5261 mdname_t **new_np, 5262 md_error_t *ep 5263 ) 5264 { 5265 char yesno[255]; 5266 char *yes; 5267 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5268 int nunits; 5269 static int *used_names = NULL; 5270 5271 assert(old_np != NULL); 5272 5273 if (used_names == NULL) { 5274 if ((nunits = meta_get_nunits(ep)) < 0) 5275 return (-1); 5276 used_names = Zalloc(nunits * sizeof (int)); 5277 } 5278 5279 /* see if it exists already */ 5280 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5281 metagetmiscname(old_np, ep) == NULL) { 5282 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5283 return (-1); 5284 else { 5285 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5286 mdclrerror(ep); 5287 return (0); 5288 } 5289 } 5290 5291 /* name exists, ask the user for a new one */ 5292 (void) printf(dgettext(TEXT_DOMAIN, 5293 "WARNING: A soft partition named %s was found in the extent\n" 5294 "headers, but this name already exists in the metadb " 5295 "configuration.\n" 5296 "In order to continue recovery you must supply\n" 5297 "a new name for this soft partition.\n"), old_np->cname); 5298 (void) printf(dgettext(TEXT_DOMAIN, 5299 "Would you like to continue and supply a new name? (yes/no) ")); 5300 5301 (void) fflush(stdout); 5302 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5303 (strlen(yesno) == 1)) 5304 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5305 dgettext(TEXT_DOMAIN, "no")); 5306 yes = dgettext(TEXT_DOMAIN, "yes"); 5307 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5308 return (-1); 5309 } 5310 5311 (void) fflush(stdin); 5312 5313 /* get the new name */ 5314 for (;;) { 5315 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5316 "for this soft partition (dXXXX) ")); 5317 (void) fflush(stdout); 5318 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5319 (void) strcpy(newname, ""); 5320 5321 /* remove newline character */ 5322 if (newname[strlen(newname) - 1] == '\n') 5323 newname[strlen(newname) - 1] = '\0'; 5324 5325 if (!(is_metaname(newname)) || 5326 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5327 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5328 "Invalid metadevice name\n")); 5329 (void) fflush(stderr); 5330 continue; 5331 } 5332 5333 if ((*new_np = metaname(&sp, newname, 5334 META_DEVICE, ep)) == NULL) { 5335 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5336 "Invalid metadevice name\n")); 5337 (void) fflush(stderr); 5338 continue; 5339 } 5340 5341 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5342 /* make sure the name isn't already being used */ 5343 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5344 metagetmiscname(*new_np, ep) != NULL) { 5345 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5346 "That name already exists\n")); 5347 continue; 5348 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5349 return (-1); 5350 5351 break; 5352 } 5353 5354 /* got a new name, place in used array and return */ 5355 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5356 mdclrerror(ep); 5357 return (1); 5358 } 5359 5360 /* 5361 * FUNCTION: meta_sp_validate_wm() 5362 * INPUT: sp - set name we are recovering in 5363 * compnp - name pointer for device we are recovering from 5364 * options - metarecover options 5365 * OUTPUT: ep - error pointer returned 5366 * RETURNS: int - 0 - success, -1 - error 5367 * PURPOSE: validate and display watermark configuration. walk the 5368 * on-disk watermark structures and validate the information 5369 * found within. since a watermark configuration is 5370 * "self-defining", the act of traversing the watermarks 5371 * is part of the validation process. 5372 */ 5373 static int 5374 meta_sp_validate_wm( 5375 mdsetname_t *sp, 5376 mdname_t *compnp, 5377 mdcmdopts_t options, 5378 md_error_t *ep 5379 ) 5380 { 5381 sp_ext_node_t *extlist = NULL; 5382 sp_ext_node_t *ext; 5383 int num_sps = 0; 5384 int rval; 5385 5386 if ((options & MDCMD_VERBOSE) != 0) 5387 (void) printf(dgettext(TEXT_DOMAIN, 5388 "Verifying on-disk structures on %s.\n"), 5389 compnp->cname); 5390 5391 /* 5392 * for each watermark, build an ext_node, place on list. 5393 */ 5394 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5395 meta_sp_cmp_by_nameseq, ep); 5396 5397 if ((options & MDCMD_VERBOSE) != 0) { 5398 /* print out what we found */ 5399 if (extlist == NULL) 5400 (void) printf(dgettext(TEXT_DOMAIN, 5401 "No extent headers found on %s.\n"), 5402 compnp->cname); 5403 else { 5404 (void) printf(dgettext(TEXT_DOMAIN, 5405 "The following extent headers were found on %s.\n"), 5406 compnp->cname); 5407 meta_sp_display_exthdr(); 5408 } 5409 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5410 meta_sp_display_ext(ext); 5411 } 5412 5413 if (rval < 0) { 5414 (void) printf(dgettext(TEXT_DOMAIN, 5415 "%s: On-disk structures invalid or " 5416 "no soft partitions found.\n"), 5417 compnp->cname); 5418 return (-1); 5419 } 5420 5421 assert(extlist != NULL); 5422 5423 /* count number of soft partitions */ 5424 for (ext = extlist; 5425 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5426 ext = ext->ext_next) { 5427 if (ext->ext_next != NULL && 5428 ext->ext_next->ext_namep != NULL && 5429 strcmp(ext->ext_next->ext_namep->cname, 5430 ext->ext_namep->cname) == 0) 5431 continue; 5432 num_sps++; 5433 } 5434 5435 if ((options & MDCMD_VERBOSE) != 0) 5436 (void) printf(dgettext(TEXT_DOMAIN, 5437 "Found %d soft partition(s) on %s.\n"), num_sps, 5438 compnp->cname); 5439 5440 if (num_sps == 0) { 5441 (void) printf(dgettext(TEXT_DOMAIN, 5442 "%s: No soft partitions.\n"), compnp->cname); 5443 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5444 } 5445 5446 /* check sequence numbers */ 5447 if ((options & MDCMD_VERBOSE) != 0) 5448 (void) printf(dgettext(TEXT_DOMAIN, 5449 "Checking sequence numbers.\n")); 5450 5451 if (meta_sp_checkseq(extlist) != 0) 5452 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5453 5454 return (0); 5455 } 5456 5457 /* 5458 * FUNCTION: meta_sp_validate_unit() 5459 * INPUT: sp - name of set we are recovering in 5460 * compnp - name of component we are recovering from 5461 * options - metarecover options 5462 * OUTPUT: ep - error pointer returned 5463 * RETURNS: int - 0 - success, -1 - error 5464 * PURPOSE: validate and display metadb configuration. begin by getting 5465 * all soft partitions built on the specified component. get 5466 * the unit structure for each one and validate the fields within. 5467 */ 5468 static int 5469 meta_sp_validate_unit( 5470 mdsetname_t *sp, 5471 mdname_t *compnp, 5472 mdcmdopts_t options, 5473 md_error_t *ep 5474 ) 5475 { 5476 md_sp_t *msp; 5477 mdnamelist_t *spnlp = NULL; 5478 mdnamelist_t *namep = NULL; 5479 int count; 5480 uint_t extn; 5481 sp_ext_length_t size; 5482 5483 if ((options & MDCMD_VERBOSE) != 0) 5484 (void) printf(dgettext(TEXT_DOMAIN, 5485 "%s: Validating soft partition metadb entries.\n"), 5486 compnp->cname); 5487 5488 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5489 return (-1); 5490 5491 /* get all soft partitions on component */ 5492 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5493 5494 if (count == 0) { 5495 (void) printf(dgettext(TEXT_DOMAIN, 5496 "%s: No soft partitions.\n"), compnp->cname); 5497 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5498 } else if (count < 0) { 5499 return (-1); 5500 } 5501 5502 /* Now go through the soft partitions and check each one */ 5503 for (namep = spnlp; namep != NULL; namep = namep->next) { 5504 mdname_t *curnp = namep->namep; 5505 sp_ext_offset_t curvoff; 5506 5507 /* get the unit structure */ 5508 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5509 return (-1); 5510 5511 /* verify generic unit structure parameters */ 5512 if ((options & MDCMD_VERBOSE) != 0) 5513 (void) printf(dgettext(TEXT_DOMAIN, 5514 "\nVerifying device %s.\n"), 5515 curnp->cname); 5516 5517 /* 5518 * MD_SP_LAST is an invalid state and is always the 5519 * highest numbered. 5520 */ 5521 if (msp->status >= MD_SP_LAST) { 5522 (void) printf(dgettext(TEXT_DOMAIN, 5523 "%s: status value %u is out of range.\n"), 5524 curnp->cname, msp->status); 5525 return (mdmderror(ep, MDE_RECOVER_FAILED, 5526 0, curnp->cname)); 5527 } else if ((options & MDCMD_VERBOSE) != 0) { 5528 uint_t tstate = 0; 5529 5530 if (metaismeta(msp->compnamep)) { 5531 if (meta_get_tstate(msp->common.namep->dev, 5532 &tstate, ep) != 0) 5533 return (-1); 5534 } 5535 (void) printf(dgettext(TEXT_DOMAIN, 5536 "%s: Status \"%s\" is valid.\n"), 5537 curnp->cname, meta_sp_status_to_name(msp->status, 5538 tstate & MD_DEV_ERRORED)); 5539 } 5540 5541 /* Now verify each extent */ 5542 if ((options & MDCMD_VERBOSE) != 0) 5543 (void) printf("%14s %21s %21s %21s\n", 5544 dgettext(TEXT_DOMAIN, "Extent Number"), 5545 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5546 dgettext(TEXT_DOMAIN, "Physical Offset"), 5547 dgettext(TEXT_DOMAIN, "Length")); 5548 5549 curvoff = 0ULL; 5550 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5551 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5552 5553 if ((options & MDCMD_VERBOSE) != 0) 5554 (void) printf("%14u %21llu %21llu %21llu\n", 5555 extn, extp->voff, extp->poff, extp->len); 5556 5557 if (extp->voff != curvoff) { 5558 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5559 "%s: virtual offset for extent %u " 5560 "is inconsistent, expected %llu, " 5561 "got %llu.\n"), curnp->cname, extn, 5562 curvoff, extp->voff); 5563 return (mdmderror(ep, MDE_RECOVER_FAILED, 5564 0, compnp->cname)); 5565 } 5566 5567 /* make sure extent does not drop off the end */ 5568 if ((extp->poff + extp->len) == size) { 5569 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5570 "%s: extent %u at offset %llu, " 5571 "length %llu exceeds the size of the " 5572 "device, %llu.\n"), curnp->cname, 5573 extn, extp->poff, extp->len, size); 5574 return (mdmderror(ep, MDE_RECOVER_FAILED, 5575 0, compnp->cname)); 5576 } 5577 5578 curvoff += extp->len; 5579 } 5580 } 5581 if (options & MDCMD_PRINT) { 5582 (void) printf(dgettext(TEXT_DOMAIN, 5583 "%s: Soft Partition metadb configuration is valid\n"), 5584 compnp->cname); 5585 } 5586 return (0); 5587 } 5588 5589 /* 5590 * FUNCTION: meta_sp_validate_wm_and_unit() 5591 * INPUT: sp - name of set we are recovering in 5592 * compnp - name of device we are recovering from 5593 * options - metarecover options 5594 * OUTPUT: ep - error pointer returned 5595 * RETURNS: int - 0 - success, -1 error 5596 * PURPOSE: cross-validate and display watermarks and metadb records. 5597 * get both the unit structures for the soft partitions built 5598 * on the specified component and the watermarks found on that 5599 * component and check to make sure they are consistent with 5600 * each other. 5601 */ 5602 static int 5603 meta_sp_validate_wm_and_unit( 5604 mdsetname_t *sp, 5605 mdname_t *np, 5606 mdcmdopts_t options, 5607 md_error_t *ep 5608 ) 5609 { 5610 sp_ext_node_t *wmlist = NULL; 5611 sp_ext_node_t *unitlist = NULL; 5612 sp_ext_node_t *unitext; 5613 sp_ext_node_t *wmext; 5614 sp_ext_offset_t tmpunitoff; 5615 mdnamelist_t *spnlp = NULL; 5616 int count; 5617 int rval = 0; 5618 int verbose = (options & MDCMD_VERBOSE); 5619 5620 /* get unit structure list */ 5621 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5622 if (count <= 0) 5623 return (-1); 5624 5625 meta_sp_list_insert(NULL, NULL, &unitlist, 5626 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5627 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5628 5629 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5630 metafreenamelist(spnlp); 5631 return (-1); 5632 } 5633 5634 metafreenamelist(spnlp); 5635 5636 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5637 5638 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5639 meta_sp_cmp_by_offset, ep) < 0) { 5640 meta_sp_list_free(&unitlist); 5641 return (-1); 5642 } 5643 5644 if (getenv(META_SP_DEBUG)) { 5645 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5646 meta_sp_list_dump(unitlist); 5647 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5648 meta_sp_list_dump(wmlist); 5649 } 5650 5651 /* 5652 * step through both lists and compare allocated nodes. Free 5653 * nodes and end watermarks may differ between the two but 5654 * that's generally ok, and if they're wrong will typically 5655 * cause misplaced allocated extents. 5656 */ 5657 if (verbose) 5658 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5659 "allocations match extent headers.\n"), np->cname); 5660 5661 unitext = unitlist; 5662 wmext = wmlist; 5663 while ((wmext != NULL) && (unitext != NULL)) { 5664 /* find next allocated extents in each list */ 5665 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5666 wmext = wmext->ext_next; 5667 5668 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5669 unitext = unitext->ext_next; 5670 5671 if (wmext == NULL || unitext == NULL) 5672 break; 5673 5674 if (verbose) { 5675 (void) printf(dgettext(TEXT_DOMAIN, 5676 "Metadb extent:\n")); 5677 meta_sp_display_exthdr(); 5678 meta_sp_display_ext(unitext); 5679 (void) printf(dgettext(TEXT_DOMAIN, 5680 "Extent header extent:\n")); 5681 meta_sp_display_exthdr(); 5682 meta_sp_display_ext(wmext); 5683 (void) printf("\n"); 5684 } 5685 5686 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5687 rval = -1; 5688 5689 /* 5690 * if the offsets aren't equal, only increment the 5691 * lowest one in hopes of getting the lists back in sync. 5692 */ 5693 tmpunitoff = unitext->ext_offset; 5694 if (unitext->ext_offset <= wmext->ext_offset) 5695 unitext = unitext->ext_next; 5696 if (wmext->ext_offset <= tmpunitoff) 5697 wmext = wmext->ext_next; 5698 } 5699 5700 /* 5701 * if both lists aren't at the end then there are extra 5702 * allocated nodes in one of them. 5703 */ 5704 if (wmext != NULL) { 5705 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5706 "%s: extent headers contain allocations not in " 5707 "the metadb\n\n"), np->cname); 5708 rval = -1; 5709 } 5710 5711 if (unitext != NULL) { 5712 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5713 "%s: metadb contains allocations not in the extent " 5714 "headers\n\n"), np->cname); 5715 rval = -1; 5716 } 5717 5718 if (options & MDCMD_PRINT) { 5719 if (rval == 0) { 5720 (void) printf(dgettext(TEXT_DOMAIN, 5721 "%s: Soft Partition metadb matches extent " 5722 "header configuration\n"), np->cname); 5723 } else { 5724 (void) printf(dgettext(TEXT_DOMAIN, 5725 "%s: Soft Partition metadb does not match extent " 5726 "header configuration\n"), np->cname); 5727 } 5728 } 5729 5730 return (rval); 5731 } 5732 5733 /* 5734 * FUNCTION: meta_sp_validate_exts() 5735 * INPUT: compnp - name pointer for device we are recovering from 5736 * wmext - extent node representing watermark 5737 * unitext - extent node from unit structure 5738 * OUTPUT: ep - return error pointer 5739 * RETURNS: int - 0 - succes, mdmderror return code - error 5740 * PURPOSE: Takes two extent nodes and checks them against each other. 5741 * offset, length, sequence number, set, and name are compared. 5742 */ 5743 static int 5744 meta_sp_validate_exts( 5745 mdname_t *compnp, 5746 sp_ext_node_t *wmext, 5747 sp_ext_node_t *unitext, 5748 md_error_t *ep 5749 ) 5750 { 5751 if (wmext->ext_offset != unitext->ext_offset) { 5752 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5753 "%s: unit structure and extent header offsets differ.\n"), 5754 compnp->cname); 5755 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5756 } 5757 5758 if (wmext->ext_length != unitext->ext_length) { 5759 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5760 "%s: unit structure and extent header lengths differ.\n"), 5761 compnp->cname); 5762 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5763 } 5764 5765 if (wmext->ext_seq != unitext->ext_seq) { 5766 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5767 "%s: unit structure and extent header sequence numbers " 5768 "differ.\n"), compnp->cname); 5769 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5770 } 5771 5772 if (wmext->ext_type != unitext->ext_type) { 5773 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5774 "%s: unit structure and extent header types differ.\n"), 5775 compnp->cname); 5776 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5777 } 5778 5779 /* 5780 * If one has a set pointer and the other doesn't, error. 5781 * If both extents have setnames, then make sure they match 5782 * If both are NULL, it's ok, they match. 5783 */ 5784 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5785 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5786 "%s: unit structure and extent header set values " 5787 "differ.\n"), compnp->cname); 5788 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5789 } 5790 5791 if (unitext->ext_setp != NULL) { 5792 if (strcmp(unitext->ext_setp->setname, 5793 wmext->ext_setp->setname) != 0) { 5794 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5795 "%s: unit structure and extent header set names " 5796 "differ.\n"), compnp->cname); 5797 return (mdmderror(ep, MDE_RECOVER_FAILED, 5798 0, compnp->cname)); 5799 } 5800 } 5801 5802 /* 5803 * If one has a name pointer and the other doesn't, error. 5804 * If both extents have names, then make sure they match 5805 * If both are NULL, it's ok, they match. 5806 */ 5807 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5808 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5809 "%s: unit structure and extent header name values " 5810 "differ.\n"), compnp->cname); 5811 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5812 } 5813 5814 if (unitext->ext_namep != NULL) { 5815 if (strcmp(wmext->ext_namep->cname, 5816 unitext->ext_namep->cname) != 0) { 5817 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5818 "%s: unit structure and extent header names " 5819 "differ.\n"), compnp->cname); 5820 return (mdmderror(ep, MDE_RECOVER_FAILED, 5821 0, compnp->cname)); 5822 } 5823 } 5824 5825 return (0); 5826 } 5827 5828 /* 5829 * FUNCTION: update_sp_status() 5830 * INPUT: sp - name of set we are recovering in 5831 * minors - pointer to an array of soft partition minor numbers 5832 * num_sps - number of minor numbers in array 5833 * status - new status to be applied to all soft parts in array 5834 * mn_set - set if current set is a multi-node set 5835 * OUTPUT: ep - return error pointer 5836 * RETURNS: int - 0 - success, -1 - error 5837 * PURPOSE: update status of soft partitions to new status. minors is an 5838 * array of minor numbers to apply the new status to. 5839 * If mn_set is set, a message is sent to all nodes in the 5840 * cluster to update the status locally. 5841 */ 5842 static int 5843 update_sp_status( 5844 mdsetname_t *sp, 5845 minor_t *minors, 5846 int num_sps, 5847 sp_status_t status, 5848 bool_t mn_set, 5849 md_error_t *ep 5850 ) 5851 { 5852 int i; 5853 int err = 0; 5854 5855 if (mn_set) { 5856 md_mn_msg_sp_setstat_t sp_setstat_params; 5857 int result; 5858 md_mn_result_t *resp = NULL; 5859 5860 for (i = 0; i < num_sps; i++) { 5861 sp_setstat_params.sp_setstat_mnum = minors[i]; 5862 sp_setstat_params.sp_setstat_status = status; 5863 5864 result = mdmn_send_message(sp->setno, 5865 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0, 5866 (char *)&sp_setstat_params, 5867 sizeof (sp_setstat_params), 5868 &resp, ep); 5869 if (resp != NULL) { 5870 if (resp->mmr_exitval != 0) 5871 err = -1; 5872 free_result(resp); 5873 } 5874 if (result != 0) { 5875 err = -1; 5876 } 5877 } 5878 } else { 5879 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5880 err = -1; 5881 } 5882 if (err < 0) { 5883 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5884 "Error updating status on recovered soft " 5885 "partitions.\n")); 5886 } 5887 return (err); 5888 } 5889 5890 /* 5891 * FUNCTION: meta_sp_recover_from_wm() 5892 * INPUT: sp - name of set we are recovering in 5893 * compnp - name pointer for component we are recovering from 5894 * options - metarecover options 5895 * OUTPUT: ep - return error pointer 5896 * RETURNS: int - 0 - success, -1 - error 5897 * PURPOSE: update metadb records to match watermarks. begin by getting 5898 * an extlist representing all soft partitions on the component. 5899 * then build a unit structure for each soft partition. 5900 * notify user of changes, then commit each soft partition to 5901 * the metadb one at a time in the "recovering" state. update 5902 * any watermarks that may need it (to reflect possible name 5903 * changes), and, finally, set the status of all recovered 5904 * partitions to the "OK" state at once. 5905 */ 5906 static int 5907 meta_sp_recover_from_wm( 5908 mdsetname_t *sp, 5909 mdname_t *compnp, 5910 mdcmdopts_t options, 5911 md_error_t *ep 5912 ) 5913 { 5914 sp_ext_node_t *extlist = NULL; 5915 sp_ext_node_t *sp_list = NULL; 5916 sp_ext_node_t *update_list = NULL; 5917 sp_ext_node_t *ext; 5918 sp_ext_node_t *sp_ext; 5919 mp_unit_t *mp; 5920 mp_unit_t **un_array; 5921 int numexts = 0, num_sps = 0, i = 0; 5922 int err = 0; 5923 int not_recovered = 0; 5924 int committed = 0; 5925 sp_ext_length_t sp_length = 0LL; 5926 mdnamelist_t *keynlp = NULL; 5927 mdname_t *np; 5928 mdname_t *new_np; 5929 int new_name; 5930 md_set_params_t set_params; 5931 minor_t *minors = NULL; 5932 char yesno[255]; 5933 char *yes; 5934 bool_t mn_set = 0; 5935 md_set_desc *sd; 5936 mm_unit_t *mm; 5937 md_set_mmown_params_t *ownpar = NULL; 5938 int comp_is_mirror = 0; 5939 5940 /* 5941 * if this component appears in another metadevice already, do 5942 * NOT recover from it. 5943 */ 5944 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5945 return (-1); 5946 5947 /* set flag if dealing with a MN set */ 5948 if (!metaislocalset(sp)) { 5949 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5950 return (-1); 5951 } 5952 if (MD_MNSET_DESC(sd)) 5953 mn_set = 1; 5954 } 5955 /* 5956 * for each watermark, build an ext_node, place on list. 5957 */ 5958 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5959 meta_sp_cmp_by_nameseq, ep) < 0) 5960 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5961 5962 assert(extlist != NULL); 5963 5964 /* count number of soft partitions */ 5965 for (ext = extlist; 5966 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5967 ext = ext->ext_next) { 5968 if (ext->ext_next != NULL && 5969 ext->ext_next->ext_namep != NULL && 5970 strcmp(ext->ext_next->ext_namep->cname, 5971 ext->ext_namep->cname) == 0) 5972 continue; 5973 num_sps++; 5974 } 5975 5976 /* allocate array of unit structure pointers */ 5977 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5978 5979 /* 5980 * build unit structures from list of ext_nodes. 5981 */ 5982 for (ext = extlist; 5983 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5984 ext = ext->ext_next) { 5985 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5986 &sp_list, ext->ext_offset, ext->ext_length, 5987 ext->ext_type, ext->ext_seq, ext->ext_flags, 5988 meta_sp_cmp_by_nameseq); 5989 5990 numexts++; 5991 sp_length += ext->ext_length - MD_SP_WMSIZE; 5992 5993 if (ext->ext_next != NULL && 5994 ext->ext_next->ext_namep != NULL && 5995 strcmp(ext->ext_next->ext_namep->cname, 5996 ext->ext_namep->cname) == 0) 5997 continue; 5998 5999 /* 6000 * if we made it here, we are at a soft partition 6001 * boundary in the list. 6002 */ 6003 if (getenv(META_SP_DEBUG)) { 6004 meta_sp_debug("meta_recover_from_wm: dumping wm " 6005 "list:\n"); 6006 meta_sp_list_dump(sp_list); 6007 } 6008 6009 assert(sp_list != NULL); 6010 assert(sp_list->ext_namep != NULL); 6011 6012 if ((new_name = meta_sp_resolve_name_conflict(sp, 6013 sp_list->ext_namep, &new_np, ep)) < 0) { 6014 err = 1; 6015 goto out; 6016 } else if (new_name) { 6017 for (sp_ext = sp_list; 6018 sp_ext != NULL; 6019 sp_ext = sp_ext->ext_next) { 6020 /* 6021 * insert into the update list for 6022 * watermark update. 6023 */ 6024 meta_sp_list_insert(sp_ext->ext_setp, 6025 new_np, &update_list, sp_ext->ext_offset, 6026 sp_ext->ext_length, sp_ext->ext_type, 6027 sp_ext->ext_seq, EXTFLG_UPDATE, 6028 meta_sp_cmp_by_offset); 6029 } 6030 6031 } 6032 if (options & MDCMD_DOIT) { 6033 /* store name in namespace */ 6034 if (mn_set) { 6035 /* send message to all nodes to return key */ 6036 md_mn_msg_addkeyname_t *send_params; 6037 int result; 6038 md_mn_result_t *resp = NULL; 6039 int message_size; 6040 6041 message_size = sizeof (*send_params) + 6042 strlen(compnp->cname) + 1; 6043 send_params = Zalloc(message_size); 6044 send_params->addkeyname_setno = sp->setno; 6045 (void) strcpy(&send_params->addkeyname_name[0], 6046 compnp->cname); 6047 result = mdmn_send_message(sp->setno, 6048 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6049 0, (char *)send_params, message_size, &resp, 6050 ep); 6051 Free(send_params); 6052 if (resp != NULL) { 6053 if (resp->mmr_exitval >= 0) { 6054 compnp->key = 6055 (mdkey_t)resp->mmr_exitval; 6056 } else { 6057 err = 1; 6058 free_result(resp); 6059 goto out; 6060 } 6061 free_result(resp); 6062 } 6063 if (result != 0) { 6064 err = 1; 6065 goto out; 6066 } 6067 (void) metanamelist_append(&keynlp, compnp); 6068 } else { 6069 if (add_key_name(sp, compnp, &keynlp, 6070 ep) != 0) { 6071 err = 1; 6072 goto out; 6073 } 6074 } 6075 } 6076 6077 /* create the unit structure */ 6078 if ((mp = meta_sp_createunit( 6079 (new_name) ? new_np : sp_list->ext_namep, compnp, 6080 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6081 err = 1; 6082 goto out; 6083 } 6084 6085 if (getenv(META_SP_DEBUG)) { 6086 meta_sp_debug("meta_sp_recover_from_wm: " 6087 "printing newly created unit structure"); 6088 meta_sp_printunit(mp); 6089 } 6090 6091 /* place in unit structure array */ 6092 un_array[i++] = mp; 6093 6094 /* free sp_list */ 6095 meta_sp_list_free(&sp_list); 6096 sp_list = NULL; 6097 numexts = 0; 6098 sp_length = 0LL; 6099 } 6100 6101 /* display configuration updates */ 6102 (void) printf(dgettext(TEXT_DOMAIN, 6103 "The following soft partitions were found and will be added to\n" 6104 "your metadevice configuration.\n")); 6105 (void) printf("%5s %15s %18s\n", 6106 dgettext(TEXT_DOMAIN, "Name"), 6107 dgettext(TEXT_DOMAIN, "Size"), 6108 dgettext(TEXT_DOMAIN, "No. of Extents")); 6109 for (i = 0; i < num_sps; i++) { 6110 (void) printf("%5s%lu %15llu %9d\n", "d", 6111 MD_MIN2UNIT(MD_SID(un_array[i])), 6112 un_array[i]->un_length, un_array[i]->un_numexts); 6113 } 6114 6115 if (!(options & MDCMD_DOIT)) { 6116 not_recovered = 1; 6117 goto out; 6118 } 6119 6120 /* ask user for confirmation */ 6121 (void) printf(dgettext(TEXT_DOMAIN, 6122 "WARNING: You are about to add one or more soft partition\n" 6123 "metadevices to your metadevice configuration. If there\n" 6124 "appears to be an error in the soft partition(s) displayed\n" 6125 "above, do NOT proceed with this recovery operation.\n")); 6126 (void) printf(dgettext(TEXT_DOMAIN, 6127 "Are you sure you want to do this (yes/no)? ")); 6128 6129 (void) fflush(stdout); 6130 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6131 (strlen(yesno) == 1)) 6132 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6133 dgettext(TEXT_DOMAIN, "no")); 6134 yes = dgettext(TEXT_DOMAIN, "yes"); 6135 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6136 not_recovered = 1; 6137 goto out; 6138 } 6139 6140 /* commit records one at a time */ 6141 for (i = 0; i < num_sps; i++) { 6142 (void) memset(&set_params, 0, sizeof (set_params)); 6143 set_params.mnum = MD_SID(un_array[i]); 6144 set_params.size = (un_array[i])->c.un_size; 6145 set_params.mdp = (uintptr_t)(un_array[i]); 6146 set_params.options = 6147 meta_check_devicesize(un_array[i]->un_length); 6148 if (set_params.options == MD_CRO_64BIT) { 6149 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6150 } else { 6151 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6152 } 6153 MD_SETDRIVERNAME(&set_params, MD_SP, 6154 MD_MIN2SET(set_params.mnum)); 6155 6156 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6157 6158 /* 6159 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6160 */ 6161 if (mn_set) { 6162 md_mn_msg_iocset_t send_params; 6163 int result; 6164 md_mn_result_t *resp = NULL; 6165 int mess_size; 6166 6167 /* 6168 * Calculate message size. md_mn_msg_iocset_t only 6169 * contains one extent, so increment the size to 6170 * include all extents 6171 */ 6172 mess_size = sizeof (send_params) - 6173 sizeof (mp_ext_t) + 6174 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6175 6176 send_params.iocset_params = set_params; 6177 (void) memcpy(&send_params.unit, un_array[i], 6178 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6179 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6180 result = mdmn_send_message(sp->setno, 6181 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0, 6182 (char *)&send_params, mess_size, &resp, 6183 ep); 6184 if (resp != NULL) { 6185 if (resp->mmr_exitval != 0) 6186 err = 1; 6187 free_result(resp); 6188 } 6189 if (result != 0) { 6190 err = 1; 6191 } 6192 } else { 6193 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6194 np->cname) != 0) { 6195 err = 1; 6196 } 6197 } 6198 6199 if (err == 1) { 6200 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6201 "%s: Error committing record to metadb.\n"), 6202 np->cname); 6203 goto out; 6204 } 6205 6206 /* note that we've committed a record */ 6207 if (!committed) 6208 committed = 1; 6209 6210 /* update any watermarks that need it */ 6211 if (update_list != NULL) { 6212 md_sp_t *msp; 6213 6214 /* 6215 * Check to see if we're trying to create a partition 6216 * on a mirror. If so we may have to enforce an 6217 * ownership change before writing the watermark out. 6218 */ 6219 if (metaismeta(compnp)) { 6220 char *miscname; 6221 6222 miscname = metagetmiscname(compnp, ep); 6223 if (miscname != NULL) 6224 comp_is_mirror = (strcmp(miscname, 6225 MD_MIRROR) == 0); 6226 else 6227 comp_is_mirror = 0; 6228 } 6229 /* 6230 * If this is a MN set and the component is a mirror, 6231 * change ownership to this node in order to write the 6232 * watermarks 6233 */ 6234 if (mn_set && comp_is_mirror) { 6235 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6236 if (mm == NULL) { 6237 err = 1; 6238 goto out; 6239 } else { 6240 err = meta_mn_change_owner(&ownpar, 6241 sp->setno, 6242 meta_getminor(compnp->dev), 6243 sd->sd_mn_mynode->nd_nodeid, 6244 MD_MN_MM_PREVENT_CHANGE | 6245 MD_MN_MM_SPAWN_THREAD); 6246 if (err != 0) 6247 goto out; 6248 } 6249 } 6250 6251 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6252 err = 1; 6253 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6254 "%s: Error updating extent headers.\n"), 6255 np->cname); 6256 goto out; 6257 } 6258 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6259 err = 1; 6260 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6261 "%s: Error updating extent headers " 6262 "on disk.\n"), np->cname); 6263 goto out; 6264 } 6265 } 6266 /* 6267 * If we have changed ownership earlier and prevented any 6268 * ownership changes, we can now allow ownership changes 6269 * again. 6270 */ 6271 if (ownpar) { 6272 (void) meta_mn_change_owner(&ownpar, sp->setno, 6273 ownpar->d.mnum, 6274 ownpar->d.owner, 6275 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6276 } 6277 } 6278 6279 /* update status of all soft partitions to OK */ 6280 minors = Zalloc(num_sps * sizeof (minor_t)); 6281 for (i = 0; i < num_sps; i++) 6282 minors[i] = MD_SID(un_array[i]); 6283 6284 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6285 if (err != 0) 6286 goto out; 6287 6288 if (options & MDCMD_PRINT) 6289 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6290 "Soft Partitions recovered from device.\n"), 6291 compnp->cname); 6292 out: 6293 /* free memory */ 6294 if (extlist != NULL) 6295 meta_sp_list_free(&extlist); 6296 if (sp_list != NULL) 6297 meta_sp_list_free(&sp_list); 6298 if (update_list != NULL) 6299 meta_sp_list_free(&update_list); 6300 if (un_array != NULL) { 6301 for (i = 0; i < num_sps; i++) 6302 Free(un_array[i]); 6303 Free(un_array); 6304 } 6305 if (minors != NULL) 6306 Free(minors); 6307 if (ownpar != NULL) 6308 Free(ownpar); 6309 (void) fflush(stdout); 6310 6311 if ((keynlp != NULL) && (committed != 1)) { 6312 /* 6313 * if we haven't committed any softparts, either because of an 6314 * error or because the user decided not to proceed, delete 6315 * namelist key for the component 6316 */ 6317 if (mn_set) { 6318 mdnamelist_t *p; 6319 6320 for (p = keynlp; (p != NULL); p = p->next) { 6321 mdname_t *np = p->namep; 6322 md_mn_msg_delkeyname_t send_params; 6323 md_mn_result_t *resp = NULL; 6324 6325 send_params.delkeyname_dev = np->dev; 6326 send_params.delkeyname_setno = sp->setno; 6327 send_params.delkeyname_key = np->key; 6328 (void) mdmn_send_message(sp->setno, 6329 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6330 0, (char *)&send_params, 6331 sizeof (send_params), 6332 &resp, ep); 6333 if (resp != NULL) { 6334 free_result(resp); 6335 } 6336 } 6337 } else { 6338 (void) del_key_names(sp, keynlp, NULL); 6339 } 6340 } 6341 6342 metafreenamelist(keynlp); 6343 6344 if (err) 6345 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6346 6347 if (not_recovered) 6348 if (options & MDCMD_PRINT) 6349 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6350 "Soft Partitions NOT recovered from device.\n"), 6351 compnp->cname); 6352 return (0); 6353 } 6354 6355 /* 6356 * FUNCTION: meta_sp_recover_from_unit() 6357 * INPUT: sp - name of set we are recovering in 6358 * compnp - name of component we are recovering from 6359 * options - metarecover options 6360 * OUTPUT: ep - return error pointer 6361 * RETURNS: int - 0 - success, -1 - error 6362 * PURPOSE: update watermarks to match metadb records. begin by getting 6363 * a namelist representing all soft partitions on the specified 6364 * component. then, build an extlist representing the soft 6365 * partitions, filling in the freespace extents. notify user 6366 * of changes, place all soft partitions into the "recovering" 6367 * state and update the watermarks. finally, return all soft 6368 * partitions to the "OK" state. 6369 */ 6370 static int 6371 meta_sp_recover_from_unit( 6372 mdsetname_t *sp, 6373 mdname_t *compnp, 6374 mdcmdopts_t options, 6375 md_error_t *ep 6376 ) 6377 { 6378 mdnamelist_t *spnlp = NULL; 6379 mdnamelist_t *nlp = NULL; 6380 sp_ext_node_t *ext = NULL; 6381 sp_ext_node_t *extlist = NULL; 6382 int count; 6383 char yesno[255]; 6384 char *yes; 6385 int rval = 0; 6386 minor_t *minors = NULL; 6387 int i; 6388 md_sp_t *msp; 6389 md_set_desc *sd; 6390 bool_t mn_set = 0; 6391 daddr_t start_block; 6392 6393 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6394 if (count <= 0) 6395 return (-1); 6396 6397 /* set flag if dealing with a MN set */ 6398 if (!metaislocalset(sp)) { 6399 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6400 return (-1); 6401 } 6402 if (MD_MNSET_DESC(sd)) 6403 mn_set = 1; 6404 } 6405 /* 6406 * Save the XDR unit structure for one of the soft partitions; 6407 * we'll use this later to provide metadevice context to 6408 * update the watermarks so the device can be resolved by 6409 * devid instead of dev_t. 6410 */ 6411 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6412 metafreenamelist(spnlp); 6413 return (-1); 6414 } 6415 6416 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6417 MD_DISKADDR_ERROR) { 6418 return (-1); 6419 } 6420 6421 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6422 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6423 meta_sp_list_insert(NULL, NULL, &extlist, 6424 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6425 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6426 6427 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6428 metafreenamelist(spnlp); 6429 return (-1); 6430 } 6431 6432 assert(extlist != NULL); 6433 if ((options & MDCMD_VERBOSE) != 0) { 6434 (void) printf(dgettext(TEXT_DOMAIN, 6435 "Updating extent headers on device %s from metadb.\n\n"), 6436 compnp->cname); 6437 (void) printf(dgettext(TEXT_DOMAIN, 6438 "The following extent headers will be written:\n")); 6439 meta_sp_display_exthdr(); 6440 } 6441 6442 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6443 6444 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6445 6446 /* mark every node for updating except the reserved space */ 6447 if (ext->ext_type != EXTTYP_RESERVED) { 6448 ext->ext_flags |= EXTFLG_UPDATE; 6449 6450 /* print extent information */ 6451 if ((options & MDCMD_VERBOSE) != 0) 6452 meta_sp_display_ext(ext); 6453 } 6454 } 6455 6456 /* request verification and then update all watermarks */ 6457 if ((options & MDCMD_DOIT) != 0) { 6458 6459 (void) printf(dgettext(TEXT_DOMAIN, 6460 "\nWARNING: You are about to overwrite portions of %s\n" 6461 "with soft partition metadata. The extent headers will be\n" 6462 "written to match the existing metadb configuration. If\n" 6463 "the device was not previously setup with this\n" 6464 "configuration, data loss may result.\n\n"), 6465 compnp->cname); 6466 (void) printf(dgettext(TEXT_DOMAIN, 6467 "Are you sure you want to do this (yes/no)? ")); 6468 6469 (void) fflush(stdout); 6470 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6471 (strlen(yesno) == 1)) 6472 (void) snprintf(yesno, sizeof (yesno), 6473 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6474 yes = dgettext(TEXT_DOMAIN, "yes"); 6475 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6476 /* place soft partitions into recovering state */ 6477 minors = Zalloc(count * sizeof (minor_t)); 6478 for (nlp = spnlp, i = 0; 6479 nlp != NULL && i < count; 6480 nlp = nlp->next, i++) { 6481 assert(nlp->namep != NULL); 6482 minors[i] = meta_getminor(nlp->namep->dev); 6483 } 6484 if (update_sp_status(sp, minors, count, 6485 MD_SP_RECOVER, mn_set, ep) != 0) { 6486 rval = -1; 6487 goto out; 6488 } 6489 6490 /* update the watermarks */ 6491 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6492 rval = -1; 6493 goto out; 6494 } 6495 6496 if (options & MDCMD_PRINT) { 6497 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6498 "Soft Partitions recovered from metadb\n"), 6499 compnp->cname); 6500 } 6501 6502 /* return soft partitions to the OK state */ 6503 if (update_sp_status(sp, minors, count, 6504 MD_SP_OK, mn_set, ep) != 0) { 6505 rval = -1; 6506 goto out; 6507 } 6508 6509 rval = 0; 6510 goto out; 6511 } 6512 } 6513 6514 if (options & MDCMD_PRINT) { 6515 (void) printf(dgettext(TEXT_DOMAIN, 6516 "%s: Soft Partitions NOT recovered from metadb\n"), 6517 compnp->cname); 6518 } 6519 6520 out: 6521 if (minors != NULL) 6522 Free(minors); 6523 metafreenamelist(spnlp); 6524 meta_sp_list_free(&extlist); 6525 (void) fflush(stdout); 6526 return (rval); 6527 } 6528 6529 6530 /* 6531 * FUNCTION: meta_sp_update_abr() 6532 * INPUT: sp - name of set we are recovering in 6533 * OUTPUT: ep - return error pointer 6534 * RETURNS: int - 0 - success, -1 - error 6535 * PURPOSE: update the ABR state for all soft partitions in the set. This 6536 * is called when joining a set. It sends a message to the master 6537 * node for each soft partition to get the value of tstate and 6538 * then sets ABR ,if required, by opening the sp, setting ABR 6539 * and then closing the sp. This approach is taken rather that 6540 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6541 * the case when we have another node simultaneously unsetting ABR. 6542 */ 6543 int 6544 meta_sp_update_abr( 6545 mdsetname_t *sp, 6546 md_error_t *ep 6547 ) 6548 { 6549 mdnamelist_t *devnlp = NULL; 6550 mdnamelist_t *p; 6551 mdname_t *devnp = NULL; 6552 md_unit_t *un; 6553 char fname[MAXPATHLEN]; 6554 int mnum, fd; 6555 volcap_t vc; 6556 uint_t tstate; 6557 6558 6559 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6560 return (-1); 6561 } 6562 6563 /* Exit if no soft partitions in this set */ 6564 if (devnlp == NULL) 6565 return (0); 6566 6567 /* For each soft partition */ 6568 for (p = devnlp; (p != NULL); p = p->next) { 6569 devnp = p->namep; 6570 6571 /* check if this is a top level metadevice */ 6572 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6573 goto out; 6574 if (MD_HAS_PARENT(MD_PARENT(un))) { 6575 Free(un); 6576 continue; 6577 } 6578 Free(un); 6579 6580 /* Get tstate from Master */ 6581 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6582 mdname_t *np; 6583 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6584 ep); 6585 if (np) { 6586 md_perror(dgettext(TEXT_DOMAIN, 6587 "Unable to get tstate for %s"), np->cname); 6588 } 6589 continue; 6590 } 6591 /* If not set on the master, nothing to do */ 6592 if (!(tstate & MD_ABR_CAP)) 6593 continue; 6594 6595 mnum = meta_getminor(devnp->dev); 6596 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6597 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6598 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6599 md_perror(dgettext(TEXT_DOMAIN, 6600 "Could not open device %s"), fname); 6601 continue; 6602 } 6603 6604 /* Set ABR state */ 6605 vc.vc_info = 0; 6606 vc.vc_set = 0; 6607 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6608 (void) close(fd); 6609 continue; 6610 } 6611 6612 vc.vc_set = DKV_ABR_CAP; 6613 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6614 (void) close(fd); 6615 goto out; 6616 } 6617 6618 (void) close(fd); 6619 } 6620 metafreenamelist(devnlp); 6621 return (0); 6622 out: 6623 metafreenamelist(devnlp); 6624 return (-1); 6625 } 6626 6627 /* 6628 * FUNCTION: meta_mn_sp_update_abr() 6629 * INPUT: arg - Given set. 6630 * PURPOSE: update the ABR state for all soft partitions in the set by 6631 * forking a process to call meta_sp_update_abr() 6632 * This function is only called via rpc.metad when adding a node 6633 * to a set, ie this node is beong joined to the set by another 6634 * node. 6635 */ 6636 void * 6637 meta_mn_sp_update_abr(void *arg) 6638 { 6639 set_t setno = *((set_t *)arg); 6640 mdsetname_t *sp; 6641 md_error_t mde = mdnullerror; 6642 int fval; 6643 6644 /* should have a set */ 6645 assert(setno != NULL); 6646 6647 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6648 mde_perror(&mde, ""); 6649 return (NULL); 6650 } 6651 6652 if (!(meta_is_mn_set(sp, &mde))) { 6653 mde_perror(&mde, ""); 6654 return (NULL); 6655 } 6656 6657 /* fork a process */ 6658 if ((fval = md_daemonize(sp, &mde)) != 0) { 6659 /* 6660 * md_daemonize will fork off a process. The is the 6661 * parent or error. 6662 */ 6663 if (fval > 0) { 6664 return (NULL); 6665 } 6666 mde_perror(&mde, ""); 6667 return (NULL); 6668 } 6669 /* 6670 * Child process should never return back to rpc.metad, but 6671 * should exit. 6672 * Flush all internally cached data inherited from parent process 6673 * since cached data will be cleared when parent process RPC request 6674 * has completed (which is possibly before this child process 6675 * can complete). 6676 * Child process can retrieve and cache its own copy of data from 6677 * rpc.metad that won't be changed by the parent process. 6678 * 6679 * Reset md_in_daemon since this child will be a client of rpc.metad 6680 * not part of the rpc.metad daemon itself. 6681 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6682 * this thread is rpc.metad or any other thread. (If this thread 6683 * was rpc.metad it could use some short circuit code to get data 6684 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6685 */ 6686 md_in_daemon = 0; 6687 metaflushsetname(sp); 6688 sr_cache_flush_setno(setno); 6689 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6690 mde_perror(&mde, ""); 6691 md_exit(sp, 1); 6692 } 6693 6694 6695 /* 6696 * Closing stdin/out/err here. 6697 */ 6698 (void) close(0); 6699 (void) close(1); 6700 (void) close(2); 6701 assert(fval == 0); 6702 6703 (void) meta_sp_update_abr(sp, &mde); 6704 6705 md_exit(sp, 0); 6706 /*NOTREACHED*/ 6707 return (NULL); 6708 } 6709 6710 int 6711 meta_sp_check_component( 6712 mdsetname_t *sp, 6713 mdname_t *np, 6714 md_error_t *ep 6715 ) 6716 { 6717 md_sp_t *msp; 6718 minor_t mnum = 0; 6719 md_dev64_t dev = 0; 6720 mdnm_params_t nm; 6721 md_getdevs_params_t mgd; 6722 side_t sideno; 6723 char *miscname; 6724 md_dev64_t *mydev = NULL; 6725 char *pname = NULL, *t; 6726 char *ctd_name = NULL; 6727 char *devname = NULL; 6728 int len; 6729 int rval = -1; 6730 6731 (void) memset(&nm, '\0', sizeof (nm)); 6732 if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL) 6733 return (-1); 6734 6735 if ((miscname = metagetmiscname(np, ep)) == NULL) 6736 return (-1); 6737 6738 sideno = getmyside(sp, ep); 6739 6740 meta_sp_debug("meta_sp_check_component: %s is on %s key: %d" 6741 " dev: %llu\n", 6742 np->cname, msp->compnamep->cname, msp->compnamep->key, 6743 msp->compnamep->dev); 6744 6745 /* 6746 * Now get the data from the unit structure. The compnamep stuff 6747 * contains the data from the namespace and we need the un_dev 6748 * from the unit structure. 6749 */ 6750 (void) memset(&mgd, '\0', sizeof (mgd)); 6751 MD_SETDRIVERNAME(&mgd, miscname, sp->setno); 6752 mgd.cnt = 1; /* sp's only have one subdevice */ 6753 mgd.mnum = meta_getminor(np->dev); 6754 6755 mydev = Zalloc(sizeof (*mydev)); 6756 mgd.devs = (uintptr_t)mydev; 6757 6758 if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) { 6759 meta_sp_debug("meta_sp_check_component: ioctl failed\n"); 6760 (void) mdstealerror(ep, &mgd.mde); 6761 rval = 0; 6762 goto out; 6763 } else if (mgd.cnt <= 0) { 6764 assert(mgd.cnt >= 0); 6765 rval = 0; 6766 goto out; 6767 } 6768 6769 /* Get the devname from the name space. */ 6770 if ((devname = meta_getnmentbykey(sp->setno, sideno, 6771 msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) { 6772 meta_sp_debug("meta_sp_check_component: key %d not" 6773 "found\n", msp->compnamep->key); 6774 goto out; 6775 } 6776 6777 meta_sp_debug("dev %s from component: (%lu, %lu)\n", 6778 devname, 6779 meta_getmajor(*mydev), 6780 meta_getminor(*mydev)); 6781 meta_sp_debug("minor from the namespace: %lu\n", mnum); 6782 6783 if (mnum != meta_getminor(*mydev)) { 6784 /* 6785 * The minor numbers are different. Update the namespace 6786 * with the information from the component. 6787 */ 6788 6789 t = strrchr(devname, '/'); 6790 t++; 6791 ctd_name = Strdup(t); 6792 6793 meta_sp_debug("meta_sp_check_component: ctd_name: %s\n", 6794 ctd_name); 6795 6796 len = strlen(devname); 6797 t = strrchr(devname, '/'); 6798 t++; 6799 pname = Zalloc((len - strlen(t)) + 1); 6800 (void) strncpy(pname, devname, (len - strlen(t))); 6801 meta_sp_debug("pathname: %s\n", pname); 6802 6803 meta_sp_debug("updating the minor number to %lu\n", nm.mnum); 6804 6805 if (meta_update_namespace(sp->setno, sideno, 6806 ctd_name, *mydev, msp->compnamep->key, pname, 6807 ep) != 0) { 6808 goto out; 6809 } 6810 } 6811 out: 6812 if (pname != NULL) 6813 Free(pname); 6814 if (ctd_name != NULL) 6815 Free(ctd_name); 6816 if (devname != NULL) 6817 Free(devname); 6818 if (mydev != NULL) 6819 Free(mydev); 6820 return (rval); 6821 } 6822