1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Just in case we're not in a build environment, make sure that 30 * TEXT_DOMAIN gets set to something. 31 */ 32 #if !defined(TEXT_DOMAIN) 33 #define TEXT_DOMAIN "SYS_TEST" 34 #endif 35 36 /* 37 * soft partition operations 38 * 39 * Soft Partitions provide a virtual disk mechanism which is used to 40 * divide a large volume into many small pieces, each appearing as a 41 * separate device. A soft partition consists of a series of extents, 42 * each having an offset and a length. The extents are logically 43 * contiguous, so where the first extent leaves off the second extent 44 * picks up. Which extent a given "virtual offset" belongs to is 45 * dependent on the size of all the previous extents in the soft 46 * partition. 47 * 48 * Soft partitions are represented in memory by an extent node 49 * (sp_ext_node_t) which contains all of the information necessary to 50 * create a unit structure and update the on-disk format, called 51 * "watermarks". These extent nodes are typically kept in a doubly 52 * linked list and are manipulated by list manipulation routines. A 53 * list of extents may represent all of the soft partitions on a volume, 54 * a single soft partition, or perhaps just a set of extents that need 55 * to be updated. Extent lists may be sorted by extent or by name/seq#, 56 * depending on which compare function is used. Most of the routines 57 * require the list be sorted by offset to work, and that's the typical 58 * configuration. 59 * 60 * In order to do an allocation, knowledge of all soft partitions on the 61 * volume is required. Then free space is determined from the space 62 * that is not allocated, and new allocations can be made from the free 63 * space. Once the new allocations are made, a unit structure is created 64 * and the watermarks are updated. The status is then changed to "okay" 65 * on the unit structure to commit the transaction. If updating the 66 * watermarks fails, the unit structure is in an intermediate state and 67 * the driver will not allow access to the device. 68 * 69 * A typical sequence of events is: 70 * 1. Fetch the list of names for all soft partitions on a volume 71 * meta_sp_get_by_component() 72 * 2. Construct an extent list from the name list 73 * meta_sp_extlist_from_namelist() 74 * 3. Fill the gaps in the extent list with free extents 75 * meta_sp_list_freefill() 76 * 4. Allocate from the free extents 77 * meta_sp_alloc_by_len() 78 * meta_sp_alloc_by_list() 79 * 5. Create the unit structure from the extent list 80 * meta_sp_createunit() 81 * meta_sp_updateunit() 82 * 6. Write out the watermarks 83 * meta_sp_update_wm() 84 * 7. Set the status to "Okay" 85 * meta_sp_setstatus() 86 * 87 */ 88 89 #include <stdio.h> 90 #include <meta.h> 91 #include "meta_repartition.h" 92 #include <sys/lvm/md_sp.h> 93 #include <sys/lvm/md_crc.h> 94 #include <strings.h> 95 #include <sys/lvm/md_mirror.h> 96 #include <sys/bitmap.h> 97 98 extern int md_in_daemon; 99 100 typedef struct sp_ext_node { 101 struct sp_ext_node *ext_next; /* next element */ 102 struct sp_ext_node *ext_prev; /* previous element */ 103 sp_ext_type_t ext_type; /* type of extent */ 104 sp_ext_offset_t ext_offset; /* starting offset */ 105 sp_ext_length_t ext_length; /* length of this node */ 106 uint_t ext_flags; /* extent flags */ 107 uint32_t ext_seq; /* watermark seq no */ 108 mdname_t *ext_namep; /* name pointer */ 109 mdsetname_t *ext_setp; /* set pointer */ 110 } sp_ext_node_t; 111 112 /* extent flags */ 113 #define EXTFLG_UPDATE (1) 114 115 /* Extent node compare function for list sorting */ 116 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 117 118 119 /* Function Prototypes */ 120 121 /* Debugging Functions */ 122 static void meta_sp_debug(char *format, ...); 123 static void meta_sp_printunit(mp_unit_t *mp); 124 125 /* Misc Support Functions */ 126 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 127 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 128 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 129 md_error_t *ep); 130 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 131 mdnamelist_t **nlpp, int force, md_error_t *ep); 132 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 133 mdname_t *compnp, md_error_t *ep); 134 135 /* Extent List Manipulation Functions */ 136 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 137 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 138 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 139 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 140 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 141 static void meta_sp_list_free(sp_ext_node_t **head); 142 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 143 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 144 sp_ext_type_t exttype, int exclude_wm); 145 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 146 sp_ext_offset_t offset); 147 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 148 sp_ext_length_t size); 149 static void meta_sp_list_dump(sp_ext_node_t *head); 150 static int meta_sp_list_overlaps(sp_ext_node_t *head); 151 152 /* Extent List Query Functions */ 153 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 154 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 155 sp_ext_length_t alignment); 156 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 157 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 158 md_error_t *ep); 159 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 160 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 161 162 163 /* Extent Allocation Functions */ 164 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 165 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 166 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 167 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 168 sp_ext_node_t **extlist, sp_ext_length_t *lp, 169 sp_ext_offset_t last_off, sp_ext_length_t alignment); 170 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 171 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 172 173 /* Extent List Population Functions */ 174 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 175 sp_ext_node_t **extlist, md_error_t *ep); 176 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 177 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 178 179 /* Print (metastat) Functions */ 180 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 181 mdprtopts_t options, md_error_t *ep); 182 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 183 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 184 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 185 186 /* Watermark Manipulation Functions */ 187 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 188 sp_ext_node_t *extlist, md_error_t *ep); 189 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 190 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 191 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 192 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 193 md_error_t *ep); 194 195 /* Unit Structure Manipulation Functions */ 196 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 197 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 198 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 199 sp_status_t status, md_error_t *ep); 200 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 201 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 202 md_error_t *ep); 203 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 204 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 205 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 206 int *repart_options, md_error_t *ep); 207 208 /* Reset (metaclear) Functions */ 209 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 210 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 211 212 /* Recovery (metarecover) Functions */ 213 static void meta_sp_display_exthdr(void); 214 static void meta_sp_display_ext(sp_ext_node_t *ext); 215 static int meta_sp_checkseq(sp_ext_node_t *extlist); 216 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 217 mdname_t **, md_error_t *); 218 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 219 mdcmdopts_t options, md_error_t *ep); 220 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 221 mdcmdopts_t options, md_error_t *ep); 222 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 223 mdcmdopts_t options, md_error_t *ep); 224 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 225 sp_ext_node_t *unitext, md_error_t *ep); 226 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 227 mdcmdopts_t options, md_error_t *ep); 228 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 229 mdcmdopts_t options, md_error_t *ep); 230 231 /* 232 * Private Constants 233 */ 234 235 static const int FORCE_RELOAD_CACHE = 1; 236 static const uint_t NO_FLAGS = 0; 237 static const sp_ext_offset_t NO_OFFSET = 0ULL; 238 static const uint_t NO_SEQUENCE_NUMBER = 0; 239 static const int ONE_SOFT_PARTITION = 1; 240 241 static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)]; 242 243 #define TEST_SOFT_PARTITION_NAMEP NULL 244 #define TEST_SETNAMEP NULL 245 246 #define EXCLUDE_WM (1) 247 #define INCLUDE_WM (0) 248 249 #define SP_UNALIGNED (0LL) 250 251 /* 252 * ************************************************************************** 253 * Debugging Functions * 254 * ************************************************************************** 255 */ 256 257 /*PRINTFLIKE1*/ 258 static void 259 meta_sp_debug(char *format, ...) 260 { 261 static int debug; 262 static int debug_set = 0; 263 va_list ap; 264 265 if (!debug_set) { 266 debug = getenv(META_SP_DEBUG) ? 1 : 0; 267 debug_set = 1; 268 } 269 270 if (debug) { 271 va_start(ap, format); 272 (void) vfprintf(stderr, format, ap); 273 va_end(ap); 274 } 275 } 276 277 static void 278 meta_sp_printunit(mp_unit_t *mp) 279 { 280 int i; 281 282 if (mp == NULL) 283 return; 284 285 /* print the common fields we know about */ 286 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 287 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 288 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 289 290 /* sp-specific fields */ 291 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 292 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 293 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 294 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 295 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 296 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 297 298 /* print extent information */ 299 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 300 for (i = 0; i < mp->un_numexts; i++) { 301 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 302 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 303 mp->un_ext[i].un_len); 304 } 305 } 306 307 /* 308 * FUNCTION: meta_sp_parsesize() 309 * INPUT: s - the string to parse 310 * OUTPUT: *szp - disk block count (0 for "all") 311 * RETURNS: -1 for error, 0 for success 312 * PURPOSE: parses the command line parameter that specifies the 313 * requested size of a soft partition. The input string 314 * is either the literal "all" or a numeric value 315 * followed by a single character, b for disk blocks, k 316 * for kilobytes, m for megabytes, g for gigabytes, or t 317 * for terabytes. p for petabytes and e for exabytes 318 * have been added as undocumented features for future 319 * expansion. For example, 100m is 100 megabytes, while 320 * 50g is 50 gigabytes. All values are rounded up to the 321 * nearest block size. 322 */ 323 int 324 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 325 { 326 if (s == NULL || szp == NULL) { 327 return (-1); 328 } 329 330 /* Check for literal "all" */ 331 if (strcasecmp(s, "all") == 0) { 332 *szp = 0; 333 return (0); 334 } 335 336 return (meta_sp_parsesizestring(s, szp)); 337 } 338 339 /* 340 * FUNCTION: meta_sp_parsesizestring() 341 * INPUT: s - the string to parse 342 * OUTPUT: *szp - disk block count 343 * RETURNS: -1 for error, 0 for success 344 * PURPOSE: parses a string that specifies size. The input string is a 345 * numeric value followed by a single character, b for disk blocks, 346 * k for kilobytes, m for megabytes, g for gigabytes, or t for 347 * terabytes. p for petabytes and e for exabytes have been added 348 * as undocumented features for future expansion. For example, 349 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 350 * are rounded up to the nearest block size. 351 */ 352 static int 353 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 354 { 355 sp_ext_length_t len = 0; 356 char len_type[2]; 357 358 if (s == NULL || szp == NULL) { 359 return (-1); 360 } 361 362 /* 363 * make sure block offset does not overflow 2^64 bytes. 364 */ 365 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 366 (len == 0LL) || 367 (len > (1LL << (64 - DEV_BSHIFT)))) 368 return (-1); 369 370 switch (len_type[0]) { 371 case 'B': 372 case 'b': 373 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 374 break; 375 case 'K': 376 case 'k': 377 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 378 break; 379 case 'M': 380 case 'm': 381 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 382 break; 383 case 'g': 384 case 'G': 385 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 386 break; 387 case 't': 388 case 'T': 389 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 390 DEV_BSIZE)); 391 break; 392 case 'p': 393 case 'P': 394 len = lbtodb(roundup( 395 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 396 DEV_BSIZE)); 397 break; 398 case 'e': 399 case 'E': 400 len = lbtodb(roundup( 401 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 402 DEV_BSIZE)); 403 break; 404 default: 405 /* error */ 406 return (-1); 407 } 408 409 *szp = len; 410 return (0); 411 } 412 413 /* 414 * FUNCTION: meta_sp_setgeom() 415 * INPUT: np - the underlying device to setup geometry for 416 * compnp - the underlying device to setup geometry for 417 * mp - the unit structure to set the geometry for 418 * OUTPUT: ep - return error pointer 419 * RETURNS: int - -1 if error, 0 otherwise 420 * PURPOSE: establishes geometry information for a device 421 */ 422 static int 423 meta_sp_setgeom( 424 mdname_t *np, 425 mdname_t *compnp, 426 mp_unit_t *mp, 427 md_error_t *ep 428 ) 429 { 430 mdgeom_t *geomp; 431 uint_t round_cyl = 0; 432 433 if ((geomp = metagetgeom(compnp, ep)) == NULL) 434 return (-1); 435 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 436 geomp->read_reinstruct, round_cyl, ep) != 0) 437 return (-1); 438 439 return (0); 440 } 441 442 /* 443 * FUNCTION: meta_sp_setstatus() 444 * INPUT: sp - the set name for the devices to set the status on 445 * minors - an array of minor numbers of devices to set status on 446 * num_units - number of entries in the array 447 * status - status value to set all units to 448 * OUTPUT: ep - return error pointer 449 * RETURNS: int - -1 if error, 0 success 450 * PURPOSE: sets the status of one or more soft partitions to the 451 * requested value 452 */ 453 int 454 meta_sp_setstatus( 455 mdsetname_t *sp, 456 minor_t *minors, 457 int num_units, 458 sp_status_t status, 459 md_error_t *ep 460 ) 461 { 462 md_sp_statusset_t status_params; 463 464 assert(minors != NULL); 465 466 /* update status of all soft partitions to the status passed in */ 467 (void) memset(&status_params, 0, sizeof (status_params)); 468 status_params.num_units = num_units; 469 status_params.new_status = status; 470 status_params.size = num_units * sizeof (minor_t); 471 status_params.minors = (uintptr_t)minors; 472 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 473 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 474 NULL) != 0) { 475 (void) mdstealerror(ep, &status_params.mde); 476 return (-1); 477 } 478 return (0); 479 } 480 481 /* 482 * FUNCTION: meta_get_sp_names() 483 * INPUT: sp - the set name to get soft partitions from 484 * options - options from the command line 485 * OUTPUT: nlpp - list of all soft partition names 486 * ep - return error pointer 487 * RETURNS: int - -1 if error, 0 success 488 * PURPOSE: returns a list of all soft partitions in the metadb 489 * for all devices in the specified set 490 */ 491 int 492 meta_get_sp_names( 493 mdsetname_t *sp, 494 mdnamelist_t **nlpp, 495 int options, 496 md_error_t *ep 497 ) 498 { 499 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 500 } 501 502 /* 503 * FUNCTION: meta_get_by_component() 504 * INPUT: sp - the set name to get soft partitions from 505 * compnp - the name of the device containing the soft 506 * partitions that will be returned 507 * force - 0 - reads cached namelist if available, 508 * 1 - reloads cached namelist, frees old namelist 509 * OUTPUT: nlpp - list of all soft partition names 510 * ep - return error pointer 511 * RETURNS: int - -1 error, otherwise the number of soft partitions 512 * found on the component (0 = none found). 513 * PURPOSE: returns a list of all soft partitions on a given device 514 * from the metadb information 515 */ 516 static int 517 meta_sp_get_by_component( 518 mdsetname_t *sp, 519 mdname_t *compnp, 520 mdnamelist_t **nlpp, 521 int force, 522 md_error_t *ep 523 ) 524 { 525 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 526 static int cached_count = 0; /* cached count */ 527 mdnamelist_t *spnlp = NULL; /* all sp names */ 528 mdnamelist_t *namep; /* list iterator */ 529 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 530 mdnamelist_t **cachetailpp; /* cache tail */ 531 md_sp_t *msp; /* unit structure */ 532 int count = 0; /* count of sp's */ 533 int err; 534 mdname_t *curnp; 535 536 if ((cached_list != NULL) && (!force)) { 537 /* return a copy of the cached list */ 538 for (namep = cached_list; namep != NULL; namep = namep->next) 539 tailpp = meta_namelist_append_wrapper(tailpp, 540 namep->namep); 541 return (cached_count); 542 } 543 544 /* free the cache and reset values to zeros to prepare for a new list */ 545 metafreenamelist(cached_list); 546 cached_count = 0; 547 cached_list = NULL; 548 cachetailpp = &cached_list; 549 *nlpp = NULL; 550 551 /* get all the softpartitions first of all */ 552 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 553 return (-1); 554 555 /* 556 * Now for each sp, see if it resides on the component we 557 * are interested in, if so then add it to our list 558 */ 559 for (namep = spnlp; namep != NULL; namep = namep->next) { 560 curnp = namep->namep; 561 562 /* get the unit structure */ 563 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 564 continue; 565 566 /* 567 * If the current soft partition is not on the same 568 * component, continue the search. If it is on the same 569 * component, add it to our namelist. 570 */ 571 err = meta_check_samedrive(compnp, msp->compnamep, ep); 572 if (err <= 0) { 573 /* not on the same device, check the next one */ 574 continue; 575 } 576 577 /* it's on the same drive */ 578 579 /* 580 * Check for overlapping partitions if the component is not 581 * a metadevice. 582 */ 583 if (!metaismeta(msp->compnamep)) { 584 /* 585 * if they're on the same drive, neither 586 * should be a metadevice if one isn't 587 */ 588 assert(!metaismeta(compnp)); 589 590 if (meta_check_overlap(msp->compnamep->cname, 591 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 592 continue; 593 594 /* in this case it's not an error for them to overlap */ 595 mdclrerror(ep); 596 } 597 598 /* Component is on the same device, add to the used list */ 599 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 600 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 601 curnp); 602 603 ++count; 604 ++cached_count; 605 } 606 607 assert(count == cached_count); 608 return (count); 609 610 out: 611 metafreenamelist(*nlpp); 612 *nlpp = NULL; 613 return (-1); 614 } 615 616 /* 617 * FUNCTION: meta_sp_get_default_alignment() 618 * INPUT: sp - the pertinent set name 619 * compnp - the name of the underlying component 620 * OUTPUT: ep - return error pointer 621 * RETURNS: sp_ext_length_t =0: no default alignment 622 * >0: default alignment 623 * PURPOSE: returns the default alignment for soft partitions to 624 * be built on top of the specified component or 625 * metadevice 626 */ 627 static sp_ext_length_t 628 meta_sp_get_default_alignment( 629 mdsetname_t *sp, 630 mdname_t *compnp, 631 md_error_t *ep 632 ) 633 { 634 sp_ext_length_t a = SP_UNALIGNED; 635 char *mname; 636 637 assert(compnp != NULL); 638 639 /* 640 * We treat raw devices as opaque, and assume nothing about 641 * their alignment requirements. 642 */ 643 if (!metaismeta(compnp)) 644 return (SP_UNALIGNED); 645 646 /* 647 * We already know it's a metadevice from the previous test; 648 * metagetmiscname() will tell us which metadevice type we 649 * have 650 */ 651 mname = metagetmiscname(compnp, ep); 652 if (mname == NULL) 653 goto out; 654 655 /* 656 * For a mirror, we want to deal with the stripe that is the 657 * primary side. If it happens to be asymmetrically 658 * configured, there is no simple way to fake a universal 659 * alignment. There's a chance that the least common 660 * denominator of the set of interlaces from all stripes of 661 * all submirrors would do it, but nobody that really cared 662 * that much about this issue would create an asymmetric 663 * config to start with. 664 * 665 * If the component underlying the soft partition is a mirror, 666 * then at the exit of this loop, compnp will have been 667 * updated to describe the first active submirror. 668 */ 669 if (strcmp(mname, MD_MIRROR) == 0) { 670 md_mirror_t *mp; 671 int smi; 672 md_submirror_t *smp; 673 674 mp = meta_get_mirror(sp, compnp, ep); 675 if (mp == NULL) 676 goto out; 677 678 for (smi = 0; smi < NMIRROR; smi++) { 679 680 smp = &mp->submirrors[smi]; 681 if (smp->state == SMS_UNUSED) 682 continue; 683 684 compnp = smp->submirnamep; 685 assert(compnp != NULL); 686 687 mname = metagetmiscname(compnp, ep); 688 if (mname == NULL) 689 goto out; 690 691 break; 692 } 693 694 if (smi == NMIRROR) 695 goto out; 696 } 697 698 /* 699 * Handle stripes and submirrors identically; just return the 700 * interlace of the first row. 701 */ 702 if (strcmp(mname, MD_STRIPE) == 0) { 703 md_stripe_t *stp; 704 705 stp = meta_get_stripe(sp, compnp, ep); 706 if (stp == NULL) 707 goto out; 708 709 a = stp->rows.rows_val[0].interlace; 710 goto out; 711 } 712 713 /* 714 * Raid is even more straightforward; the interlace applies to 715 * the entire device. 716 */ 717 if (strcmp(mname, MD_RAID) == 0) { 718 md_raid_t *rp; 719 720 rp = meta_get_raid(sp, compnp, ep); 721 if (rp == NULL) 722 goto out; 723 724 a = rp->interlace; 725 goto out; 726 } 727 728 /* 729 * If we have arrived here with the alignment still not set, 730 * then we expect the error to have been set by one of the 731 * routines we called. If neither is the case, something has 732 * really gone wrong above. (Probably the submirror walk 733 * failed to produce a valid submirror, but that would be 734 * really bad...) 735 */ 736 out: 737 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 738 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 739 740 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 741 mde_perror(ep, NULL); 742 } 743 744 assert((a > 0) || (!mdisok(ep))); 745 746 return (a); 747 } 748 749 750 751 /* 752 * FUNCTION: meta_check_insp() 753 * INPUT: sp - the set name for the device to check 754 * np - the name of the device to check 755 * slblk - the starting offset of the device to check 756 * nblks - the number of blocks in the device to check 757 * OUTPUT: ep - return error pointer 758 * RETURNS: int - 0 - device contains soft partitions 759 * -1 - device does not contain soft partitions 760 * PURPOSE: determines whether a device contains any soft partitions 761 */ 762 /* ARGSUSED */ 763 int 764 meta_check_insp( 765 mdsetname_t *sp, 766 mdname_t *np, 767 diskaddr_t slblk, 768 diskaddr_t nblks, 769 md_error_t *ep 770 ) 771 { 772 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 773 int count; 774 int rval; 775 776 /* check set pointer */ 777 assert(sp != NULL); 778 779 /* 780 * Get a list of the soft partitions that currently reside on 781 * the component. We should ALWAYS force reload the cache, 782 * because if we're using the md.tab, we must rebuild 783 * the list because it won't contain the previous (if any) 784 * soft partition. 785 */ 786 /* find all soft partitions on the component */ 787 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 788 789 if (count == -1) { 790 rval = -1; 791 } else if (count > 0) { 792 rval = mduseerror(ep, MDE_ALREADY, np->dev, 793 spnlp->namep->cname, np->cname); 794 } else { 795 rval = 0; 796 } 797 798 metafreenamelist(spnlp); 799 return (rval); 800 } 801 802 /* 803 * ************************************************************************** 804 * Extent List Manipulation Functions * 805 * ************************************************************************** 806 */ 807 808 /* 809 * FUNCTION: meta_sp_cmp_by_nameseq() 810 * INPUT: e1 - first node to compare 811 * e2 - second node to compare 812 * OUTPUT: none 813 * RETURNS: int - =0 - nodes are equal 814 * <0 - e1 should go before e2 815 * >0 - e1 should go after e2 816 * PURPOSE: used for sorted list inserts to build a list sorted by 817 * name first and sequence number second. 818 */ 819 static int 820 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 821 { 822 int rval; 823 824 if (e1->ext_namep == NULL) 825 return (1); 826 if (e2->ext_namep == NULL) 827 return (-1); 828 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 829 return (rval); 830 831 /* the names are equal, compare sequence numbers */ 832 if (e1->ext_seq > e2->ext_seq) 833 return (1); 834 if (e1->ext_seq < e2->ext_seq) 835 return (-1); 836 /* sequence numbers are also equal */ 837 return (0); 838 } 839 840 /* 841 * FUNCTION: meta_sp_cmp_by_offset() 842 * INPUT: e1 - first node to compare 843 * e2 - second node to compare 844 * OUTPUT: none 845 * RETURNS: int - =0 - nodes are equal 846 * <0 - e1 should go before e2 847 * >0 - e1 should go after e2 848 * PURPOSE: used for sorted list inserts to build a list sorted by offset 849 */ 850 static int 851 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 852 { 853 if (e1->ext_offset > e2->ext_offset) 854 return (1); 855 if (e1->ext_offset < e2->ext_offset) 856 return (-1); 857 /* offsets are equal */ 858 return (0); 859 } 860 861 /* 862 * FUNCTION: meta_sp_list_insert() 863 * INPUT: sp - the set name for the device the node belongs to 864 * np - the name of the device the node belongs to 865 * head - the head of the list, must be NULL for empty list 866 * offset - the physical offset of this extent in sectors 867 * length - the length of this extent in sectors 868 * type - the type of the extent being inserted 869 * seq - the sequence number of the extent being inserted 870 * flags - extent flags (eg. whether it needs to be updated) 871 * compare - the compare function to use 872 * OUTPUT: head - points to the new head if a node was inserted 873 * at the beginning 874 * RETURNS: void 875 * PURPOSE: inserts an extent node into a sorted doubly linked list. 876 * The sort order is determined by the compare function. 877 * Memory is allocated for the node in this function and it 878 * is up to the caller to free it, possibly using 879 * meta_sp_list_free(). If a node is inserted at the 880 * beginning of the list, the head pointer is updated to 881 * point to the new first node. 882 */ 883 static void 884 meta_sp_list_insert( 885 mdsetname_t *sp, 886 mdname_t *np, 887 sp_ext_node_t **head, 888 sp_ext_offset_t offset, 889 sp_ext_length_t length, 890 sp_ext_type_t type, 891 uint_t seq, 892 uint_t flags, 893 ext_cmpfunc_t compare 894 ) 895 { 896 sp_ext_node_t *newext; 897 sp_ext_node_t *curext; 898 899 assert(head != NULL); 900 901 /* Don't bother adding zero length nodes */ 902 if (length == 0ULL) 903 return; 904 905 /* allocate and fill in new ext_node */ 906 newext = Zalloc(sizeof (sp_ext_node_t)); 907 908 newext->ext_offset = offset; 909 newext->ext_length = length; 910 newext->ext_flags = flags; 911 newext->ext_type = type; 912 newext->ext_seq = seq; 913 newext->ext_setp = sp; 914 newext->ext_namep = np; 915 916 /* first node in the list */ 917 if (*head == NULL) { 918 newext->ext_next = newext->ext_prev = NULL; 919 *head = newext; 920 } else if ((*compare)(*head, newext) >= 0) { 921 /* the first node has a bigger offset, so insert before it */ 922 assert((*head)->ext_prev == NULL); 923 924 newext->ext_prev = NULL; 925 newext->ext_next = *head; 926 (*head)->ext_prev = newext; 927 *head = newext; 928 } else { 929 /* 930 * find the next node whose offset is greater than 931 * the one we want to insert, or the end of the list. 932 */ 933 for (curext = *head; 934 (curext->ext_next != NULL) && 935 ((*compare)(curext->ext_next, newext) < 0); 936 (curext = curext->ext_next)) 937 ; 938 939 /* link the new node in after the current node */ 940 newext->ext_next = curext->ext_next; 941 newext->ext_prev = curext; 942 943 if (curext->ext_next != NULL) 944 curext->ext_next->ext_prev = newext; 945 946 curext->ext_next = newext; 947 } 948 } 949 950 /* 951 * FUNCTION: meta_sp_list_free() 952 * INPUT: head - the head of the list, must be NULL for empty list 953 * OUTPUT: head - points to NULL on return 954 * RETURNS: void 955 * PURPOSE: walks a double linked extent list and frees each node 956 */ 957 static void 958 meta_sp_list_free(sp_ext_node_t **head) 959 { 960 sp_ext_node_t *ext; 961 sp_ext_node_t *next; 962 963 assert(head != NULL); 964 965 ext = *head; 966 while (ext) { 967 next = ext->ext_next; 968 Free(ext); 969 ext = next; 970 } 971 *head = NULL; 972 } 973 974 /* 975 * FUNCTION: meta_sp_list_remove() 976 * INPUT: head - the head of the list, must be NULL for empty list 977 * ext - the extent to remove, must be a member of the list 978 * OUTPUT: head - points to the new head of the list 979 * RETURNS: void 980 * PURPOSE: unlinks the node specified by ext from the list and 981 * frees it, possibly moving the head pointer forward if 982 * the head is the node being removed. 983 */ 984 static void 985 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 986 { 987 assert(head != NULL); 988 assert(*head != NULL); 989 990 if (*head == ext) 991 *head = ext->ext_next; 992 993 if (ext->ext_prev != NULL) 994 ext->ext_prev->ext_next = ext->ext_next; 995 if (ext->ext_next != NULL) 996 ext->ext_next->ext_prev = ext->ext_prev; 997 Free(ext); 998 } 999 1000 /* 1001 * FUNCTION: meta_sp_list_size() 1002 * INPUT: head - the head of the list, must be NULL for empty list 1003 * exttype - the type of the extents to sum 1004 * exclude_wm - subtract space for extent headers from total 1005 * OUTPUT: none 1006 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1007 * PURPOSE: sums the lengths of all extents in the list matching the 1008 * specified type. This could be used for computing the 1009 * amount of free or used space, for example. 1010 */ 1011 static sp_ext_length_t 1012 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1013 { 1014 sp_ext_node_t *ext; 1015 sp_ext_length_t size = 0LL; 1016 1017 for (ext = head; ext != NULL; ext = ext->ext_next) 1018 if (ext->ext_type == exttype) 1019 size += ext->ext_length - 1020 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1021 1022 return (size); 1023 } 1024 1025 /* 1026 * FUNCTION: meta_sp_list_find() 1027 * INPUT: head - the head of the list, must be NULL for empty list 1028 * offset - the offset contained by the node to find 1029 * OUTPUT: none 1030 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1031 * or NULL if no such nodes were found. 1032 * PURPOSE: finds a node in a list containing the requested offset 1033 * (inclusive). If multiple nodes contain this offset then 1034 * only the first will be returned, though typically these 1035 * lists are managed with non-overlapping nodes. 1036 * 1037 * *The list MUST be sorted by offset for this function to work.* 1038 */ 1039 static sp_ext_node_t * 1040 meta_sp_list_find( 1041 sp_ext_node_t *head, 1042 sp_ext_offset_t offset 1043 ) 1044 { 1045 sp_ext_node_t *ext; 1046 1047 for (ext = head; ext != NULL; ext = ext->ext_next) { 1048 /* check if the offset lies within this extent */ 1049 if ((offset >= ext->ext_offset) && 1050 (offset < ext->ext_offset + ext->ext_length)) { 1051 /* 1052 * the requested extent should always be a 1053 * subset of an extent in the list. 1054 */ 1055 return (ext); 1056 } 1057 } 1058 return (NULL); 1059 } 1060 1061 /* 1062 * FUNCTION: meta_sp_list_freefill() 1063 * INPUT: head - the head of the list, must be NULL for empty list 1064 * size - the size of the volume this extent list is 1065 * representing 1066 * OUTPUT: head - the new head of the list 1067 * RETURNS: void 1068 * PURPOSE: finds gaps in the extent list and fills them with a free 1069 * node. If there is a gap at the beginning the head 1070 * pointer will be changed to point to the new free node. 1071 * If there is free space at the end, the last free extent 1072 * will extend all the way out to the size specified. 1073 * 1074 * *The list MUST be sorted by offset for this function to work.* 1075 */ 1076 static void 1077 meta_sp_list_freefill( 1078 sp_ext_node_t **head, 1079 sp_ext_length_t size 1080 ) 1081 { 1082 sp_ext_node_t *ext; 1083 sp_ext_offset_t curoff = 0LL; 1084 1085 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1086 if (curoff < ext->ext_offset) 1087 meta_sp_list_insert(NULL, NULL, head, 1088 curoff, ext->ext_offset - curoff, 1089 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1090 curoff = ext->ext_offset + ext->ext_length; 1091 } 1092 1093 /* pad inverse list out to the end */ 1094 if (curoff < size) 1095 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1096 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1097 1098 if (getenv(META_SP_DEBUG)) { 1099 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1100 "holes freefilled:\n"); 1101 meta_sp_list_dump(*head); 1102 } 1103 } 1104 1105 /* 1106 * FUNCTION: meta_sp_list_dump() 1107 * INPUT: head - the head of the list, must be NULL for empty list 1108 * OUTPUT: none 1109 * RETURNS: void 1110 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1111 */ 1112 static void 1113 meta_sp_list_dump(sp_ext_node_t *head) 1114 { 1115 sp_ext_node_t *ext; 1116 1117 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1118 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1119 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1120 "Next"); 1121 for (ext = head; ext != NULL; ext = ext->ext_next) { 1122 if (ext->ext_namep != NULL) 1123 meta_sp_debug("%5s", ext->ext_namep->cname); 1124 else 1125 meta_sp_debug("%5s", "NONE"); 1126 1127 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1128 switch (ext->ext_type) { 1129 case EXTTYP_ALLOC: 1130 meta_sp_debug("%7s ", "ALLOC"); 1131 break; 1132 case EXTTYP_FREE: 1133 meta_sp_debug("%7s ", "FREE"); 1134 break; 1135 case EXTTYP_END: 1136 meta_sp_debug("%7s ", "END"); 1137 break; 1138 case EXTTYP_RESERVED: 1139 meta_sp_debug("%7s ", "RESV"); 1140 break; 1141 default: 1142 meta_sp_debug("%7s ", "INVLD"); 1143 break; 1144 } 1145 1146 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1147 ext->ext_offset, ext->ext_length, 1148 ext->ext_flags, (void *) ext->ext_prev, 1149 (void *) ext->ext_next); 1150 } 1151 meta_sp_debug("\n"); 1152 } 1153 1154 /* 1155 * FUNCTION: meta_sp_list_overlaps() 1156 * INPUT: head - the head of the list, must be NULL for empty list 1157 * OUTPUT: none 1158 * RETURNS: int - 1 if extents overlap, 0 if ok 1159 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1160 * offset for this function to work properly. 1161 */ 1162 static int 1163 meta_sp_list_overlaps(sp_ext_node_t *head) 1164 { 1165 sp_ext_node_t *ext; 1166 1167 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1168 if (ext->ext_offset + ext->ext_length > 1169 ext->ext_next->ext_offset) 1170 return (1); 1171 } 1172 return (0); 1173 } 1174 1175 /* 1176 * ************************************************************************** 1177 * Extent Allocation Functions * 1178 * ************************************************************************** 1179 */ 1180 1181 /* 1182 * FUNCTION: meta_sp_alloc_by_ext() 1183 * INPUT: sp - the set name for the device the node belongs to 1184 * np - the name of the device the node belongs to 1185 * head - the head of the list, must be NULL for empty list 1186 * free_ext - the free extent being allocated from 1187 * alloc_offset - the offset of the allocation 1188 * alloc_len - the length of the allocation 1189 * seq - the sequence number of the allocation 1190 * OUTPUT: head - the new head pointer 1191 * RETURNS: void 1192 * PURPOSE: allocates a portion of the free extent free_ext. The 1193 * allocated portion starts at alloc_offset and is 1194 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1195 * alloc_length) must be contained within the free extent. 1196 * 1197 * The free extent is split into as many as 3 pieces - a 1198 * free extent containing [ free_offset .. alloc_offset ), an 1199 * allocated extent containing the range [ alloc_offset .. 1200 * alloc_end ], and another free extent containing the 1201 * range ( alloc_end .. free_end ]. If either of the two 1202 * new free extents would be zero length, they are not created. 1203 * 1204 * Finally, the original free extent is removed. All newly 1205 * created extents have the EXTFLG_UPDATE flag set. 1206 */ 1207 static void 1208 meta_sp_alloc_by_ext( 1209 mdsetname_t *sp, 1210 mdname_t *np, 1211 sp_ext_node_t **head, 1212 sp_ext_node_t *free_ext, 1213 sp_ext_offset_t alloc_offset, 1214 sp_ext_length_t alloc_length, 1215 uint_t seq 1216 ) 1217 { 1218 sp_ext_offset_t free_offset = free_ext->ext_offset; 1219 sp_ext_length_t free_length = free_ext->ext_length; 1220 1221 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1222 sp_ext_offset_t free_end = free_offset + free_length; 1223 1224 /* allocated extent must be a subset of the free extent */ 1225 assert(free_offset <= alloc_offset); 1226 assert(free_end >= alloc_end); 1227 1228 meta_sp_list_remove(head, free_ext); 1229 1230 if (free_offset < alloc_offset) { 1231 meta_sp_list_insert(NULL, NULL, head, free_offset, 1232 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1233 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1234 } 1235 1236 if (free_end > alloc_end) { 1237 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1238 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1239 meta_sp_cmp_by_offset); 1240 } 1241 1242 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1243 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1244 1245 if (getenv(META_SP_DEBUG)) { 1246 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1247 meta_sp_list_dump(*head); 1248 } 1249 } 1250 1251 /* 1252 * FUNCTION: meta_sp_alloc_by_len() 1253 * INPUT: sp - the set name for the device the node belongs to 1254 * np - the name of the device the node belongs to 1255 * head - the head of the list, must be NULL for empty list 1256 * *lp - the requested length to allocate 1257 * last_off - the last offset already allocated. 1258 * alignment - the desired extent alignmeent 1259 * OUTPUT: head - the new head pointer 1260 * *lp - the length allocated 1261 * RETURNS: int - -1 if error, the number of new extents on success 1262 * PURPOSE: allocates extents from free space to satisfy the requested 1263 * length. If requested length is zero, allocates all 1264 * remaining free space. This function provides the meat 1265 * of the extent allocation algorithm. Allocation is a 1266 * three tier process: 1267 * 1268 * 1. If last_off is nonzero and there is free space following 1269 * that node, then it is extended to allocate as much of that 1270 * free space as possible. This is useful for metattach. 1271 * 2. If a free extent can be found to satisfy the remaining 1272 * requested space, then satisfy the rest of the request 1273 * from that extent. 1274 * 3. Start allocating space from any remaining free extents until 1275 * the remainder of the request is satisified. 1276 * 1277 * If alignment is non-zero, then every extent modified 1278 * or newly allocated will be aligned modulo alignment, 1279 * with a length that is an integer multiple of 1280 * alignment. 1281 * 1282 * The EXTFLG_UPDATE flag is set for all nodes (free and 1283 * allocated) that require updated watermarks. 1284 * 1285 * This algorithm may have a negative impact on fragmentation 1286 * in pathological cases and may be improved if it turns out 1287 * to be a problem. This may be exacerbated by particularly 1288 * large alignments. 1289 * 1290 * NOTE: It's confusing, so it demands an explanation: 1291 * - len is used to represent requested data space; it 1292 * does not include room for a watermark. On each full 1293 * or partial allocation, len will be decremented by 1294 * alloc_len (see next paragraph) until it reaches 1295 * zero. 1296 * - alloc_len is used to represent data space allocated 1297 * from a particular extent; it does not include space 1298 * for a watermark. In the rare event that a_length 1299 * (see next paragraph) is equal to MD_SP_WMSIZE, 1300 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1301 * fragment of space will be utterly unusable. 1302 * - a_length is used to represent all space to be 1303 * allocated from a particular extent; it DOES include 1304 * space for a watermark. 1305 */ 1306 static int 1307 meta_sp_alloc_by_len( 1308 mdsetname_t *sp, 1309 mdname_t *np, 1310 sp_ext_node_t **head, 1311 sp_ext_length_t *lp, 1312 sp_ext_offset_t last_off, 1313 sp_ext_offset_t alignment 1314 ) 1315 { 1316 sp_ext_node_t *free_ext; 1317 sp_ext_node_t *alloc_ext; 1318 uint_t last_seq = 0; 1319 uint_t numexts = 0; 1320 sp_ext_length_t freespace; 1321 sp_ext_length_t alloc_len; 1322 sp_ext_length_t len; 1323 1324 /* We're DOA if we can't read *lp */ 1325 assert(lp != NULL); 1326 len = *lp; 1327 1328 /* 1329 * Process the nominal case first: we've been given an actual 1330 * size argument, rather than the literal "all" 1331 */ 1332 1333 if (len != 0) { 1334 1335 /* 1336 * Short circuit the check for free space. This may 1337 * tell us we have enough space when we really don't 1338 * because each extent loses space to a watermark, but 1339 * it will always tell us there isn't enough space 1340 * correctly. Worst case we do some extra work. 1341 */ 1342 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1343 INCLUDE_WM); 1344 1345 if (freespace < len) 1346 return (-1); 1347 1348 /* 1349 * First see if we can extend the last extent for an 1350 * attach. 1351 */ 1352 if (last_off != 0LL) { 1353 int align = 0; 1354 1355 alloc_ext = 1356 meta_sp_list_find(*head, last_off); 1357 assert(alloc_ext != NULL); 1358 1359 /* 1360 * The offset test reflects the 1361 * inclusion of the watermark in the extent 1362 */ 1363 align = (alignment > 0) && 1364 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1365 alignment) == 0); 1366 1367 /* 1368 * If we decided not to align here, we should 1369 * also reset "alignment" so we don't bother 1370 * later, either. 1371 */ 1372 if (!align) { 1373 alignment = 0; 1374 } 1375 1376 last_seq = alloc_ext->ext_seq; 1377 1378 free_ext = meta_sp_list_find(*head, 1379 alloc_ext->ext_offset + 1380 alloc_ext->ext_length); 1381 1382 /* 1383 * If a free extent follows our last allocated 1384 * extent, then remove the last allocated 1385 * extent and increase the size of the free 1386 * extent to overlap it, then allocate the 1387 * total space from the new free extent. 1388 */ 1389 if (free_ext != NULL && 1390 free_ext->ext_type == EXTTYP_FREE) { 1391 assert(free_ext->ext_offset == 1392 alloc_ext->ext_offset + 1393 alloc_ext->ext_length); 1394 1395 alloc_len = 1396 MIN(len, free_ext->ext_length); 1397 1398 if (align && (alloc_len < len)) { 1399 /* No watermark space needed */ 1400 alloc_len -= alloc_len % alignment; 1401 } 1402 1403 if (alloc_len > 0) { 1404 free_ext->ext_offset -= 1405 alloc_ext->ext_length; 1406 free_ext->ext_length += 1407 alloc_ext->ext_length; 1408 1409 meta_sp_alloc_by_ext(sp, np, head, 1410 free_ext, free_ext->ext_offset, 1411 alloc_ext->ext_length + alloc_len, 1412 last_seq); 1413 1414 /* 1415 * now remove the original allocated 1416 * node. We may have overlapping 1417 * extents for a short time before 1418 * this node is removed. 1419 */ 1420 meta_sp_list_remove(head, alloc_ext); 1421 len -= alloc_len; 1422 } 1423 } 1424 last_seq++; 1425 } 1426 1427 if (len == 0LL) 1428 goto out; 1429 1430 /* 1431 * Next, see if we can find a single allocation for 1432 * the remainder. This may make fragmentation worse 1433 * in some cases, but there's no good way to allocate 1434 * that doesn't have a highly fragmented corner case. 1435 */ 1436 for (free_ext = *head; free_ext != NULL; 1437 free_ext = free_ext->ext_next) { 1438 sp_ext_offset_t a_offset; 1439 sp_ext_offset_t a_length; 1440 1441 if (free_ext->ext_type != EXTTYP_FREE) 1442 continue; 1443 1444 /* 1445 * The length test should include space for 1446 * the watermark 1447 */ 1448 1449 a_offset = free_ext->ext_offset; 1450 a_length = free_ext->ext_length; 1451 1452 if (alignment > 0) { 1453 1454 /* 1455 * Shortcut for extents that have been 1456 * previously added to pad out the 1457 * data space 1458 */ 1459 if (a_length < alignment) { 1460 continue; 1461 } 1462 1463 /* 1464 * Round up so the data space begins 1465 * on a properly aligned boundary. 1466 */ 1467 a_offset += alignment - 1468 (a_offset % alignment) - MD_SP_WMSIZE; 1469 1470 /* 1471 * This is only necessary in case the 1472 * watermark size is ever greater than 1473 * one. It'll never happen, of 1474 * course; we'll get rid of watermarks 1475 * before we make 'em bigger. 1476 */ 1477 if (a_offset < free_ext->ext_offset) { 1478 a_offset += alignment; 1479 } 1480 1481 /* 1482 * Adjust the length to account for 1483 * the space lost above (if any) 1484 */ 1485 a_length -= 1486 (a_offset - free_ext->ext_offset); 1487 } 1488 1489 if (a_length >= len + MD_SP_WMSIZE) { 1490 meta_sp_alloc_by_ext(sp, np, head, 1491 free_ext, a_offset, 1492 len + MD_SP_WMSIZE, last_seq); 1493 1494 len = 0LL; 1495 numexts++; 1496 break; 1497 } 1498 } 1499 1500 if (len == 0LL) 1501 goto out; 1502 1503 1504 /* 1505 * If the request could not be satisfied by extending 1506 * the last extent or by a single extent, then put 1507 * multiple smaller extents together until the request 1508 * is satisfied. 1509 */ 1510 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1511 free_ext = free_ext->ext_next) { 1512 sp_ext_offset_t a_offset; 1513 sp_ext_length_t a_length; 1514 1515 if (free_ext->ext_type != EXTTYP_FREE) 1516 continue; 1517 1518 a_offset = free_ext->ext_offset; 1519 a_length = free_ext->ext_length; 1520 1521 if (alignment > 0) { 1522 1523 /* 1524 * Shortcut for extents that have been 1525 * previously added to pad out the 1526 * data space 1527 */ 1528 if (a_length < alignment) { 1529 continue; 1530 } 1531 1532 /* 1533 * Round up so the data space begins 1534 * on a properly aligned boundary. 1535 */ 1536 a_offset += alignment - 1537 (a_offset % alignment) - MD_SP_WMSIZE; 1538 1539 /* 1540 * This is only necessary in case the 1541 * watermark size is ever greater than 1542 * one. It'll never happen, of 1543 * course; we'll get rid of watermarks 1544 * before we make 'em bigger. 1545 */ 1546 if (a_offset < free_ext->ext_offset) { 1547 a_offset += alignment; 1548 } 1549 1550 /* 1551 * Adjust the length to account for 1552 * the space lost above (if any) 1553 */ 1554 a_length -= 1555 (a_offset - free_ext->ext_offset); 1556 1557 /* 1558 * Adjust the length to be properly 1559 * aligned if it is NOT to be the 1560 * last extent in the soft partition. 1561 */ 1562 if ((a_length - MD_SP_WMSIZE) < len) 1563 a_length -= 1564 (a_length - MD_SP_WMSIZE) 1565 % alignment; 1566 } 1567 1568 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1569 if (alloc_len == 0) 1570 continue; 1571 1572 /* 1573 * meta_sp_alloc_by_ext() expects the 1574 * allocation length to include the watermark 1575 * size, which is why we don't simply pass in 1576 * alloc_len here. 1577 */ 1578 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1579 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1580 last_seq); 1581 1582 len -= alloc_len; 1583 numexts++; 1584 last_seq++; 1585 } 1586 1587 1588 /* 1589 * If there was not enough space we can throw it all 1590 * away since no real work has been done yet. 1591 */ 1592 if (len != 0) { 1593 meta_sp_list_free(head); 1594 return (-1); 1595 } 1596 } 1597 1598 /* 1599 * Otherwise, the literal "all" was specified: allocate all 1600 * available free space. Don't bother with alignment. 1601 */ 1602 else { 1603 /* First, extend the last extent if this is a grow */ 1604 if (last_off != 0LL) { 1605 alloc_ext = 1606 meta_sp_list_find(*head, last_off); 1607 assert(alloc_ext != NULL); 1608 1609 last_seq = alloc_ext->ext_seq; 1610 1611 free_ext = meta_sp_list_find(*head, 1612 alloc_ext->ext_offset + 1613 alloc_ext->ext_length); 1614 1615 /* 1616 * If a free extent follows our last allocated 1617 * extent, then remove the last allocated 1618 * extent and increase the size of the free 1619 * extent to overlap it, then allocate the 1620 * total space from the new free extent. 1621 */ 1622 if (free_ext != NULL && 1623 free_ext->ext_type == EXTTYP_FREE) { 1624 assert(free_ext->ext_offset == 1625 alloc_ext->ext_offset + 1626 alloc_ext->ext_length); 1627 1628 len = alloc_len = 1629 free_ext->ext_length; 1630 1631 free_ext->ext_offset -= 1632 alloc_ext->ext_length; 1633 free_ext->ext_length += 1634 alloc_ext->ext_length; 1635 1636 meta_sp_alloc_by_ext(sp, np, head, 1637 free_ext, free_ext->ext_offset, 1638 alloc_ext->ext_length + alloc_len, 1639 last_seq); 1640 1641 /* 1642 * now remove the original allocated 1643 * node. We may have overlapping 1644 * extents for a short time before 1645 * this node is removed. 1646 */ 1647 meta_sp_list_remove(head, alloc_ext); 1648 } 1649 1650 last_seq++; 1651 } 1652 1653 /* Next, grab all remaining free space */ 1654 for (free_ext = *head; free_ext != NULL; 1655 free_ext = free_ext->ext_next) { 1656 1657 if (free_ext->ext_type == EXTTYP_FREE) { 1658 alloc_len = 1659 free_ext->ext_length - MD_SP_WMSIZE; 1660 if (alloc_len == 0) 1661 continue; 1662 1663 /* 1664 * meta_sp_alloc_by_ext() expects the 1665 * allocation length to include the 1666 * watermark size, which is why we 1667 * don't simply pass in alloc_len 1668 * here. 1669 */ 1670 meta_sp_alloc_by_ext(sp, np, head, 1671 free_ext, free_ext->ext_offset, 1672 free_ext->ext_length, 1673 last_seq); 1674 1675 len += alloc_len; 1676 numexts++; 1677 last_seq++; 1678 } 1679 } 1680 } 1681 1682 out: 1683 if (getenv(META_SP_DEBUG)) { 1684 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1685 "allocation:\n"); 1686 meta_sp_list_dump(*head); 1687 } 1688 1689 if (*lp == 0) { 1690 *lp = len; 1691 1692 /* 1693 * Make sure the callers hit a no space error if we 1694 * didn't actually find anything. 1695 */ 1696 if (len == 0) { 1697 return (-1); 1698 } 1699 } 1700 1701 return (numexts); 1702 } 1703 1704 /* 1705 * FUNCTION: meta_sp_alloc_by_list() 1706 * INPUT: sp - the set name for the device the node belongs to 1707 * np - the name of the device the node belongs to 1708 * head - the head of the list, must be NULL for empty list 1709 * oblist - an extent list containing requested nodes to allocate 1710 * OUTPUT: head - the new head pointer 1711 * RETURNS: int - -1 if error, the number of new extents on success 1712 * PURPOSE: allocates extents from free space to satisfy the requested 1713 * extent list. This is primarily used for the -o/-b options 1714 * where the user may specifically request extents to allocate. 1715 * Each extent in the oblist must be a subset (inclusive) of a 1716 * free extent and may not overlap each other. This 1717 * function sets the EXTFLG_UPDATE flag for each node that 1718 * requires a watermark update after allocating. 1719 */ 1720 static int 1721 meta_sp_alloc_by_list( 1722 mdsetname_t *sp, 1723 mdname_t *np, 1724 sp_ext_node_t **head, 1725 sp_ext_node_t *oblist 1726 ) 1727 { 1728 sp_ext_node_t *ext; 1729 sp_ext_node_t *free_ext; 1730 uint_t numexts = 0; 1731 1732 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1733 1734 free_ext = meta_sp_list_find(*head, 1735 ext->ext_offset - MD_SP_WMSIZE); 1736 1737 /* Make sure the allocation is within the free extent */ 1738 if ((free_ext == NULL) || 1739 (ext->ext_offset + ext->ext_length > 1740 free_ext->ext_offset + free_ext->ext_length) || 1741 (free_ext->ext_type != EXTTYP_FREE)) 1742 return (-1); 1743 1744 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1745 ext->ext_offset - MD_SP_WMSIZE, 1746 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1747 1748 numexts++; 1749 } 1750 1751 assert(meta_sp_list_overlaps(*head) == 0); 1752 1753 if (getenv(META_SP_DEBUG)) { 1754 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1755 "allocation:\n"); 1756 meta_sp_list_dump(*head); 1757 } 1758 1759 return (numexts); 1760 } 1761 1762 /* 1763 * ************************************************************************** 1764 * Extent List Population Functions * 1765 * ************************************************************************** 1766 */ 1767 1768 /* 1769 * FUNCTION: meta_sp_extlist_from_namelist() 1770 * INPUT: sp - the set name for the device the node belongs to 1771 * spnplp - the namelist of soft partitions to build a list from 1772 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1773 * ep - return error pointer 1774 * RETURNS: int - -1 if error, 0 on success 1775 * PURPOSE: builds an extent list representing the soft partitions 1776 * specified in the namelist. Each extent in each soft 1777 * partition is added to the list with the type EXTTYP_ALLOC. 1778 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1779 * extent in the list includes the space occupied by the 1780 * watermark, which is not included in the unit structures. 1781 */ 1782 static int 1783 meta_sp_extlist_from_namelist( 1784 mdsetname_t *sp, 1785 mdnamelist_t *spnlp, 1786 sp_ext_node_t **extlist, 1787 md_error_t *ep 1788 ) 1789 { 1790 int extn; 1791 md_sp_t *msp; /* unit structure of the sp's */ 1792 mdnamelist_t *namep; 1793 1794 assert(sp != NULL); 1795 1796 /* 1797 * Now go through the soft partitions and add a node to the used 1798 * list for each allocated extent. 1799 */ 1800 for (namep = spnlp; namep != NULL; namep = namep->next) { 1801 mdname_t *curnp = namep->namep; 1802 1803 /* get the unit structure */ 1804 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1805 return (-1); 1806 1807 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1808 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1809 1810 /* 1811 * subtract from offset and add to the length 1812 * to account for the watermark, which is not 1813 * contained in the extents in the unit structure. 1814 */ 1815 meta_sp_list_insert(sp, curnp, extlist, 1816 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1817 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1818 } 1819 } 1820 return (0); 1821 } 1822 1823 /* 1824 * FUNCTION: meta_sp_extlist_from_wm() 1825 * INPUT: sp - the set name for the device the node belongs to 1826 * compnp - the name of the device to scan watermarks on 1827 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1828 * ep - return error pointer 1829 * RETURNS: int - -1 if error, 0 on success 1830 * PURPOSE: builds an extent list representing the soft partitions 1831 * specified in the namelist. Each extent in each soft 1832 * partition is added to the list with the type EXTTYP_ALLOC. 1833 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1834 * extent in the list includes the space occupied by the 1835 * watermark, which is not included in the unit structures. 1836 */ 1837 static int 1838 meta_sp_extlist_from_wm( 1839 mdsetname_t *sp, 1840 mdname_t *compnp, 1841 sp_ext_node_t **extlist, 1842 ext_cmpfunc_t compare, 1843 md_error_t *ep 1844 ) 1845 { 1846 mp_watermark_t wm; 1847 mdname_t *np = NULL; 1848 mdsetname_t *spsetp = NULL; 1849 sp_ext_offset_t cur_off; 1850 md_set_desc *sd; 1851 int init = 0; 1852 mdkey_t key; 1853 minor_t mnum; 1854 1855 if (!metaislocalset(sp)) { 1856 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1857 return (-1); 1858 } 1859 1860 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1861 return (-1); 1862 1863 for (;;) { 1864 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1865 return (-1); 1866 } 1867 1868 /* get the set and name pointers */ 1869 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1870 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1871 return (-1); 1872 } 1873 } 1874 1875 /* 1876 * For the MN set, meta_init_make_device needs to 1877 * be run on all the nodes so the entries for the 1878 * softpart device name and its comp can be created 1879 * in the same order in the replica namespace. If 1880 * we have it run on mdmn_do_iocset then the mddbs 1881 * will be out of sync between master node and slave 1882 * nodes. 1883 */ 1884 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1885 1886 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1887 md_mn_msg_addmdname_t *send_params; 1888 int result; 1889 md_mn_result_t *resp = NULL; 1890 int message_size; 1891 1892 message_size = sizeof (*send_params) + 1893 strlen(wm.wm_mdname) + 1; 1894 send_params = Zalloc(message_size); 1895 send_params->addmdname_setno = sp->setno; 1896 (void) strcpy(&send_params->addmdname_name[0], 1897 wm.wm_mdname); 1898 result = mdmn_send_message(sp->setno, 1899 MD_MN_MSG_ADDMDNAME, 1900 MD_MSGF_PANIC_WHEN_INCONSISTENT, 1901 (char *)send_params, message_size, &resp, 1902 ep); 1903 Free(send_params); 1904 if (resp != NULL) { 1905 if (resp->mmr_exitval != 0) { 1906 free_result(resp); 1907 return (-1); 1908 } 1909 free_result(resp); 1910 } 1911 if (result != 0) 1912 return (-1); 1913 } else { 1914 1915 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1916 if ((key = meta_init_make_device(&sp, 1917 wm.wm_mdname, ep)) <= 0) { 1918 return (-1); 1919 } 1920 init = 1; 1921 } 1922 } 1923 1924 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1925 if (np == NULL) { 1926 if (init) { 1927 if (meta_getnmentbykey(sp->setno, 1928 MD_SIDEWILD, key, NULL, &mnum, 1929 NULL, ep) != NULL) { 1930 (void) metaioctl(MD_IOCREM_DEV, 1931 &mnum, ep, NULL); 1932 } 1933 (void) del_self_name(sp, key, ep); 1934 } 1935 return (-1); 1936 } 1937 } 1938 1939 /* insert watermark into extent list */ 1940 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1941 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1942 EXTFLG_UPDATE, compare); 1943 1944 /* if we see the end watermark, we're done */ 1945 if (wm.wm_type == EXTTYP_END) 1946 break; 1947 1948 cur_off += wm.wm_length + 1; 1949 1950 /* clear out set and name pointers for next iteration */ 1951 np = NULL; 1952 spsetp = NULL; 1953 } 1954 1955 return (0); 1956 } 1957 1958 /* 1959 * ************************************************************************** 1960 * Print (metastat) Functions * 1961 * ************************************************************************** 1962 */ 1963 1964 /* 1965 * FUNCTION: meta_sp_short_print() 1966 * INPUT: msp - the unit structure to display 1967 * fp - the file pointer to send output to 1968 * options - print options from the command line processor 1969 * OUTPUT: ep - return error pointer 1970 * RETURNS: int - -1 if error, 0 on success 1971 * PURPOSE: display a short report of the soft partition in md.tab 1972 * form, primarily used for metastat -p. 1973 */ 1974 static int 1975 meta_sp_short_print( 1976 md_sp_t *msp, 1977 char *fname, 1978 FILE *fp, 1979 mdprtopts_t options, 1980 md_error_t *ep 1981 ) 1982 { 1983 int extn; 1984 1985 if (options & PRINT_LARGEDEVICES) { 1986 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1987 return (0); 1988 } 1989 1990 if (options & PRINT_FN) { 1991 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1992 return (0); 1993 } 1994 1995 /* print name and -p */ 1996 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1997 return (mdsyserror(ep, errno, fname)); 1998 1999 /* print the component */ 2000 /* 2001 * Always print the full path name 2002 */ 2003 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2004 return (mdsyserror(ep, errno, fname)); 2005 2006 /* print out each extent */ 2007 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2008 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2009 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2010 extp->len) == EOF) 2011 return (mdsyserror(ep, errno, fname)); 2012 } 2013 2014 if (fprintf(fp, "\n") == EOF) 2015 return (mdsyserror(ep, errno, fname)); 2016 2017 /* success */ 2018 return (0); 2019 } 2020 2021 /* 2022 * FUNCTION: meta_sp_status_to_name() 2023 * INPUT: xsp_status - the status value to convert to a string 2024 * tstate - transient errored device state. If set the 2025 * device is Unavailable 2026 * OUTPUT: none 2027 * RETURNS: char * - a pointer to the string representing the status value 2028 * PURPOSE: return an internationalized string representing the 2029 * status value for a soft partition. The strings are 2030 * strdup'd and must be freed by the caller. 2031 */ 2032 static char * 2033 meta_sp_status_to_name( 2034 xsp_status_t xsp_status, 2035 uint_t tstate 2036 ) 2037 { 2038 char *rval = NULL; 2039 2040 /* 2041 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2042 * value for an 'Unavailable' return. tstate can be set because of 2043 * other multi-node reasons (e.g. ABR being set) 2044 */ 2045 if (tstate & MD_INACCESSIBLE) { 2046 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2047 } 2048 2049 switch (xsp_status) { 2050 case MD_SP_CREATEPEND: 2051 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2052 break; 2053 case MD_SP_GROWPEND: 2054 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2055 break; 2056 case MD_SP_DELPEND: 2057 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2058 break; 2059 case MD_SP_OK: 2060 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2061 break; 2062 case MD_SP_ERR: 2063 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2064 break; 2065 case MD_SP_RECOVER: 2066 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2067 break; 2068 } 2069 2070 if (rval == NULL) 2071 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2072 2073 return (rval); 2074 } 2075 2076 /* 2077 * FUNCTION: meta_sp_report() 2078 * INPUT: sp - the set name for the unit being displayed 2079 * msp - the unit structure to display 2080 * nlpp - pass back the large devs 2081 * fp - the file pointer to send output to 2082 * options - print options from the command line processor 2083 * OUTPUT: ep - return error pointer 2084 * RETURNS: int - -1 if error, 0 on success 2085 * PURPOSE: print a full report of the device specified 2086 */ 2087 static int 2088 meta_sp_report( 2089 mdsetname_t *sp, 2090 md_sp_t *msp, 2091 mdnamelist_t **nlpp, 2092 char *fname, 2093 FILE *fp, 2094 mdprtopts_t options, 2095 md_error_t *ep 2096 ) 2097 { 2098 uint_t extn; 2099 char *status; 2100 char *devid = ""; 2101 mdname_t *didnp = NULL; 2102 ddi_devid_t dtp; 2103 int len; 2104 uint_t tstate = 0; 2105 2106 if (options & PRINT_LARGEDEVICES) { 2107 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2108 return (0); 2109 } else { 2110 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2111 return (-1); 2112 } 2113 } 2114 2115 if (options & PRINT_FN) { 2116 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2117 return (0); 2118 } else { 2119 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2120 return (-1); 2121 } 2122 } 2123 2124 if (options & PRINT_HEADER) { 2125 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2126 msp->common.namep->cname) == EOF) 2127 return (mdsyserror(ep, errno, fname)); 2128 } 2129 2130 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2131 msp->compnamep->cname) == EOF) 2132 return (mdsyserror(ep, errno, fname)); 2133 2134 /* Determine if device is available before displaying status */ 2135 if (metaismeta(msp->common.namep)) { 2136 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2137 return (-1); 2138 } 2139 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2140 2141 /* print out "State" to be consistent with other metadevices */ 2142 if (tstate & MD_ABR_CAP) { 2143 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2144 " State: %s - Application Based Recovery (ABR)\n"), 2145 status) == EOF) { 2146 Free(status); 2147 return (mdsyserror(ep, errno, fname)); 2148 } 2149 } else { 2150 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2151 " State: %s\n"), status) == EOF) { 2152 Free(status); 2153 return (mdsyserror(ep, errno, fname)); 2154 } 2155 } 2156 free(status); 2157 2158 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2159 msp->common.size, 2160 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2161 return (mdsyserror(ep, errno, fname)); 2162 2163 /* print component details */ 2164 if (! metaismeta(msp->compnamep)) { 2165 diskaddr_t start_blk; 2166 int has_mddb; 2167 char *has_mddb_str; 2168 2169 /* print header */ 2170 /* 2171 * Building a format string on the fly that will 2172 * be used in (f)printf. This allows the length 2173 * of the ctd to vary from small to large without 2174 * looking horrible. 2175 */ 2176 len = strlen(msp->compnamep->cname); 2177 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2178 len += 2; 2179 if (fprintf(fp, 2180 "\t%-*.*s %-12.12s %-5.5s %s\n", 2181 len, len, 2182 dgettext(TEXT_DOMAIN, "Device"), 2183 dgettext(TEXT_DOMAIN, "Start Block"), 2184 dgettext(TEXT_DOMAIN, "Dbase"), 2185 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2186 return (mdsyserror(ep, errno, fname)); 2187 } 2188 2189 2190 /* get info */ 2191 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2192 MD_DISKADDR_ERROR) 2193 return (-1); 2194 2195 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2196 return (-1); 2197 2198 if (has_mddb) 2199 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2200 else 2201 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2202 2203 /* populate the key in the name_p structure */ 2204 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2205 if (didnp == NULL) { 2206 return (-1); 2207 } 2208 2209 /* determine if devid does NOT exist */ 2210 if (options & PRINT_DEVID) { 2211 if ((dtp = meta_getdidbykey(sp->setno, 2212 getmyside(sp, ep), didnp->key, ep)) == NULL) 2213 devid = dgettext(TEXT_DOMAIN, "No "); 2214 else { 2215 devid = dgettext(TEXT_DOMAIN, "Yes"); 2216 free(dtp); 2217 } 2218 } 2219 2220 /* print info */ 2221 /* 2222 * This allows the length 2223 * of the ctd to vary from small to large without 2224 * looking horrible. 2225 */ 2226 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2227 len, msp->compnamep->cname, 2228 start_blk, has_mddb_str, devid) == EOF) { 2229 return (mdsyserror(ep, errno, fname)); 2230 } 2231 (void) fprintf(fp, "\n"); 2232 } 2233 2234 2235 /* print the headers */ 2236 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2237 dgettext(TEXT_DOMAIN, "Extent"), 2238 dgettext(TEXT_DOMAIN, "Start Block"), 2239 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2240 return (mdsyserror(ep, errno, fname)); 2241 2242 /* print out each extent */ 2243 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2244 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2245 2246 /* If PRINT_TIMES option is ever supported, add output here */ 2247 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2248 extn, extp->poff, extp->len) == EOF) 2249 return (mdsyserror(ep, errno, fname)); 2250 } 2251 2252 /* separate records with a newline */ 2253 (void) fprintf(fp, "\n"); 2254 return (0); 2255 } 2256 2257 /* 2258 * FUNCTION: meta_sp_print() 2259 * INPUT: sp - the set name for the unit being displayed 2260 * np - the name of the device to print 2261 * fname - ??? not used 2262 * fp - the file pointer to send output to 2263 * options - print options from the command line processor 2264 * OUTPUT: ep - return error pointer 2265 * RETURNS: int - -1 if error, 0 on success 2266 * PURPOSE: print a full report of the device specified by metastat. 2267 * This is the main entry point for printing. 2268 */ 2269 int 2270 meta_sp_print( 2271 mdsetname_t *sp, 2272 mdname_t *np, 2273 mdnamelist_t **nlpp, 2274 char *fname, 2275 FILE *fp, 2276 mdprtopts_t options, 2277 md_error_t *ep 2278 ) 2279 { 2280 md_sp_t *msp; 2281 md_unit_t *mdp; 2282 int rval = 0; 2283 2284 /* should always have the same set */ 2285 assert(sp != NULL); 2286 2287 /* print all the soft partitions */ 2288 if (np == NULL) { 2289 mdnamelist_t *nlp = NULL; 2290 mdnamelist_t *p; 2291 int cnt; 2292 2293 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2294 return (-1); 2295 else if (cnt == 0) 2296 return (0); 2297 2298 /* recusively print them out */ 2299 for (p = nlp; (p != NULL); p = p->next) { 2300 mdname_t *curnp = p->namep; 2301 2302 /* 2303 * one problem with the rval of -1 here is that 2304 * the error gets "lost" when the next device is 2305 * printed, but we want to print them all anyway. 2306 */ 2307 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2308 options, ep); 2309 } 2310 2311 /* clean up, return success */ 2312 metafreenamelist(nlp); 2313 return (rval); 2314 } 2315 2316 /* get the unit structure */ 2317 if ((msp = meta_get_sp_common(sp, np, 2318 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2319 return (-1); 2320 2321 /* check for parented */ 2322 if ((! (options & PRINT_SUBDEVS)) && 2323 (MD_HAS_PARENT(msp->common.parent))) { 2324 return (0); 2325 } 2326 2327 /* print appropriate detail */ 2328 if (options & PRINT_SHORT) { 2329 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2330 return (-1); 2331 } else { 2332 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2333 return (-1); 2334 } 2335 2336 /* 2337 * Print underlying metadevices if they are parented to us and 2338 * if the info for the underlying metadevice has not been printed. 2339 */ 2340 if (metaismeta(msp->compnamep)) { 2341 /* get the unit structure for the subdevice */ 2342 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2343 return (-1); 2344 2345 /* If info not already printed, recurse */ 2346 if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) { 2347 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2348 (options | PRINT_HEADER | PRINT_SUBDEVS), 2349 NULL, ep) != 0) { 2350 return (-1); 2351 } 2352 BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp))); 2353 } 2354 } 2355 return (0); 2356 } 2357 2358 /* 2359 * ************************************************************************** 2360 * Watermark Manipulation Functions * 2361 * ************************************************************************** 2362 */ 2363 2364 /* 2365 * FUNCTION: meta_sp_get_start() 2366 * INPUT: sp - the operating set 2367 * np - device upon which the sp is being built 2368 * OUTPUT: ep - return error pointer 2369 * RETURNS: daddr_t - -1 if error, otherwise the start block 2370 * PURPOSE: Encapsulate the determination of the start block of the 2371 * device upon which the sp is built or being built. 2372 */ 2373 static diskaddr_t 2374 meta_sp_get_start( 2375 mdsetname_t *sp, 2376 mdname_t *np, 2377 md_error_t *ep 2378 ) 2379 { 2380 daddr_t start_block; 2381 2382 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) 2383 start_block += MD_SP_START; 2384 2385 return (start_block); 2386 } 2387 2388 /* 2389 * FUNCTION: meta_sp_update_wm() 2390 * INPUT: sp - the operating set 2391 * msp - a pointer to the XDR unit structure 2392 * extlist - the extent list specifying watermarks to update 2393 * OUTPUT: ep - return error pointer 2394 * RETURNS: int - -1 if error, 0 on success 2395 * PURPOSE: steps backwards through the extent list updating 2396 * watermarks for all extents with the EXTFLG_UPDATE flag 2397 * set. Writing the watermarks guarantees consistency when 2398 * extents must be broken into pieces since the original 2399 * watermark will be the last to be updated, and will be 2400 * changed to point to a new watermark that is already 2401 * known to be consistent. If one of the writes fails, the 2402 * original watermark stays intact and none of the changes 2403 * are realized. 2404 */ 2405 static int 2406 meta_sp_update_wm( 2407 mdsetname_t *sp, 2408 md_sp_t *msp, 2409 sp_ext_node_t *extlist, 2410 md_error_t *ep 2411 ) 2412 { 2413 sp_ext_node_t *ext; 2414 sp_ext_node_t *tail; 2415 mp_watermark_t *wmp, *watermarks; 2416 xsp_offset_t *osp, *offsets; 2417 int update_count = 0; 2418 int rval = 0; 2419 md_unit_t *mdp; 2420 md_sp_update_wm_t update_params; 2421 2422 if (getenv(META_SP_DEBUG)) { 2423 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2424 meta_sp_list_dump(extlist); 2425 } 2426 2427 /* 2428 * find the last node so we can write the watermarks backwards 2429 * and count watermarks to update so we can allocate space 2430 */ 2431 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2432 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2433 update_count++; 2434 } 2435 2436 if (ext->ext_next == NULL) { 2437 tail = ext; 2438 } 2439 } 2440 ext = tail; 2441 2442 wmp = watermarks = 2443 Zalloc(update_count * sizeof (mp_watermark_t)); 2444 osp = offsets = 2445 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2446 2447 while (ext != NULL) { 2448 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2449 /* update watermark */ 2450 wmp->wm_magic = MD_SP_MAGIC; 2451 wmp->wm_version = MD_SP_VERSION; 2452 wmp->wm_type = ext->ext_type; 2453 wmp->wm_seq = ext->ext_seq; 2454 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2455 2456 /* fill in the volume name and set name */ 2457 if (ext->ext_namep != NULL) 2458 (void) strcpy(wmp->wm_mdname, 2459 ext->ext_namep->cname); 2460 else 2461 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2462 if (ext->ext_setp != NULL && 2463 ext->ext_setp->setno != MD_LOCAL_SET) 2464 (void) strcpy(wmp->wm_setname, 2465 ext->ext_setp->setname); 2466 else 2467 (void) strcpy(wmp->wm_setname, 2468 MD_SP_LOCALSETNAME); 2469 2470 /* Generate the checksum */ 2471 wmp->wm_checksum = 0; 2472 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2473 sizeof (*wmp), NULL); 2474 2475 /* record the extent offset */ 2476 *osp = ext->ext_offset; 2477 2478 /* Advance the placeholders */ 2479 osp++; wmp++; 2480 } 2481 ext = ext->ext_prev; 2482 } 2483 2484 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2485 if (mdp == NULL) { 2486 rval = -1; 2487 goto out; 2488 } 2489 2490 (void) memset(&update_params, 0, sizeof (update_params)); 2491 update_params.mnum = MD_SID(mdp); 2492 update_params.count = update_count; 2493 update_params.wmp = (uintptr_t)watermarks; 2494 update_params.osp = (uintptr_t)offsets; 2495 MD_SETDRIVERNAME(&update_params, MD_SP, 2496 MD_MIN2SET(update_params.mnum)); 2497 2498 if (metaioctl(MD_IOC_SPUPDATEWM, &update_params, 2499 &update_params.mde, msp->common.namep->cname) != 0) { 2500 (void) mdstealerror(ep, &update_params.mde); 2501 rval = -1; 2502 goto out; 2503 } 2504 2505 out: 2506 Free(watermarks); 2507 Free(offsets); 2508 2509 return (rval); 2510 } 2511 2512 /* 2513 * FUNCTION: meta_sp_clear_wm() 2514 * INPUT: sp - the operating set 2515 * msp - the unit structure for the soft partition to clear 2516 * OUTPUT: ep - return error pointer 2517 * RETURNS: int - -1 if error, 0 on success 2518 * PURPOSE: steps through the extents for a soft partition unit and 2519 * creates an extent list designed to mark all of the 2520 * watermarks for those extents as free. The extent list 2521 * is then passed to meta_sp_update_wm() to actually write 2522 * the watermarks out. 2523 */ 2524 static int 2525 meta_sp_clear_wm( 2526 mdsetname_t *sp, 2527 md_sp_t *msp, 2528 md_error_t *ep 2529 ) 2530 { 2531 sp_ext_node_t *extlist = NULL; 2532 int numexts = msp->ext.ext_len; 2533 uint_t i; 2534 int rval = 0; 2535 2536 /* for each watermark must set the flag to SP_FREE */ 2537 for (i = 0; i < numexts; i++) { 2538 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2539 2540 meta_sp_list_insert(NULL, NULL, &extlist, 2541 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2542 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2543 } 2544 2545 /* update watermarks */ 2546 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2547 2548 meta_sp_list_free(&extlist); 2549 return (rval); 2550 } 2551 2552 /* 2553 * FUNCTION: meta_sp_read_wm() 2554 * INPUT: sp - setname for component 2555 * compnp - mdname_t for component 2556 * offset - the offset of the watermark to read (sectors) 2557 * OUTPUT: wm - the watermark structure to read into 2558 * ep - return error pointer 2559 * RETURNS: int - -1 if error, 0 on success 2560 * PURPOSE: seeks out to the requested offset and reads a watermark. 2561 * It then verifies that the magic number is correct and 2562 * that the checksum is valid, returning an error if either 2563 * is wrong. 2564 */ 2565 static int 2566 meta_sp_read_wm( 2567 mdsetname_t *sp, 2568 mdname_t *compnp, 2569 mp_watermark_t *wm, 2570 sp_ext_offset_t offset, 2571 md_error_t *ep 2572 ) 2573 { 2574 md_sp_read_wm_t read_params; 2575 2576 /* 2577 * make sure block offset does not overflow 2^64 bytes and it's a 2578 * multiple of the block size. 2579 */ 2580 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2581 /* LINTED */ 2582 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2583 2584 (void) memset(wm, 0, sizeof (*wm)); 2585 2586 (void) memset(&read_params, 0, sizeof (read_params)); 2587 read_params.rdev = compnp->dev; 2588 read_params.wmp = (uintptr_t)wm; 2589 read_params.offset = offset; 2590 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2591 2592 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2593 &read_params.mde, compnp->cname) != 0) { 2594 2595 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2596 "Extent header read failed, block %llu.\n"), offset); 2597 return (mdstealerror(ep, &read_params.mde)); 2598 } 2599 2600 /* make sure magic number is correct */ 2601 if (wm->wm_magic != MD_SP_MAGIC) { 2602 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2603 "found incorrect magic number %x, expected %x.\n"), 2604 wm->wm_magic, MD_SP_MAGIC); 2605 /* 2606 * Pass NULL for the device name as we don't have 2607 * valid watermark contents. 2608 */ 2609 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2610 } 2611 2612 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2613 sizeof (*wm), NULL)) { 2614 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2615 "found incorrect checksum %x.\n"), 2616 wm->wm_checksum); 2617 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2618 } 2619 2620 return (0); 2621 } 2622 2623 /* 2624 * ************************************************************************** 2625 * Query Functions 2626 * ************************************************************************** 2627 */ 2628 2629 /* 2630 * IMPORTANT NOTE: This is a static function that assumes that 2631 * its input parameters have been checked and 2632 * have valid values that lie within acceptable 2633 * ranges. 2634 * 2635 * FUNCTION: meta_sp_enough_space() 2636 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2637 * must be > 0 2638 * desired_sp_size - the desired soft partition size in blocks; 2639 * must be > 0 2640 * extent_listpp - a reference to a reference to an extent 2641 * list that lists the extents on a device; 2642 * must be a reference to a reference to a 2643 * valid extent list 2644 * alignment - the desired data space alignment for the sp's 2645 * OUTPUT: boolean_t return value 2646 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2647 * list to create the desired soft partitions, 2648 * B_FALSE if there's not enough space 2649 * PURPOSE: determines whether there's enough free space in an extent 2650 * list to allow creation of a set of soft partitions 2651 */ 2652 static boolean_t 2653 meta_sp_enough_space( 2654 int desired_number_of_sps, 2655 blkcnt_t desired_sp_size, 2656 sp_ext_node_t **extent_listpp, 2657 sp_ext_length_t alignment 2658 ) 2659 { 2660 boolean_t enough_space; 2661 int number_of_sps; 2662 int number_of_extents_used; 2663 sp_ext_length_t desired_ext_length = desired_sp_size; 2664 2665 enough_space = B_TRUE; 2666 number_of_sps = 0; 2667 while ((enough_space == B_TRUE) && 2668 (number_of_sps < desired_number_of_sps)) { 2669 /* 2670 * Use the extent allocation algorithm implemented by 2671 * meta_sp_alloc_by_len() to test whether the free 2672 * extents in the extent list referenced by *extent_listpp 2673 * contain enough space to accomodate a soft partition 2674 * of size desired_ext_length. 2675 * 2676 * Repeat the test <desired_number_of_sps> times 2677 * or until it fails, whichever comes first, 2678 * each time allocating the extents required to 2679 * create the soft partition without actually 2680 * creating the soft partition. 2681 */ 2682 number_of_extents_used = meta_sp_alloc_by_len( 2683 TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2684 extent_listpp, &desired_ext_length, 2685 NO_OFFSET, alignment); 2686 if (number_of_extents_used == -1) { 2687 enough_space = B_FALSE; 2688 } else { 2689 number_of_sps++; 2690 } 2691 } 2692 return (enough_space); 2693 } 2694 2695 /* 2696 * IMPORTANT NOTE: This is a static function that calls other functions 2697 * that check its mdsetnamep and device_mdnamep 2698 * input parameters, but expects extent_listpp to 2699 * be a initialized to a valid address to which 2700 * it can write a reference to the extent list that 2701 * it creates. 2702 * 2703 * FUNCTION: meta_sp_get_extent_list() 2704 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2705 * for the set containing the device for 2706 * which the extents are to be listed 2707 * device_mdnamep - a reference to the mdname_t structure 2708 * for the device for which the extents 2709 * are to be listed 2710 * OUTPUT: *extent_listpp - a reference to the extent list for 2711 * the device; NULL if the function fails 2712 * *ep - the libmeta error encountered, if any 2713 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2714 * B_FALSE if not 2715 * PURPOSE: gets the extent list for a device 2716 */ 2717 static boolean_t 2718 meta_sp_get_extent_list( 2719 mdsetname_t *mdsetnamep, 2720 mdname_t *device_mdnamep, 2721 sp_ext_node_t **extent_listpp, 2722 md_error_t *ep 2723 ) 2724 { 2725 diskaddr_t device_size_in_blocks; 2726 mdnamelist_t *sp_name_listp; 2727 diskaddr_t start_block_address_in_blocks; 2728 2729 *extent_listpp = NULL; 2730 sp_name_listp = NULL; 2731 2732 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2733 device_mdnamep, ep); 2734 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2735 if (getenv(META_SP_DEBUG)) { 2736 mde_perror(ep, 2737 "meta_sp_get_extent_list:meta_sp_get_start"); 2738 } 2739 return (B_FALSE); 2740 } 2741 2742 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2743 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2744 if (getenv(META_SP_DEBUG)) { 2745 mde_perror(ep, 2746 "meta_sp_get_extent_list:metagetsize"); 2747 } 2748 return (B_FALSE); 2749 } 2750 2751 /* 2752 * Sanity check: the start block will have skipped an integer 2753 * number of cylinders, C. C will usually be zero. If (C > 0), 2754 * and the disk slice happens to only be C cylinders in total 2755 * size, we'll fail this check. 2756 */ 2757 if (device_size_in_blocks <= 2758 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2759 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2760 return (B_FALSE); 2761 } 2762 2763 /* 2764 * After this point, we will have allocated resources, so any 2765 * failure returns must be through the supplied "fail" label 2766 * to properly deallocate things. 2767 */ 2768 2769 /* 2770 * Create an empty extent list that starts one watermark past 2771 * the start block of the device and ends one watermark before 2772 * the end of the device. 2773 */ 2774 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2775 extent_listpp, NO_OFFSET, 2776 (sp_ext_length_t)start_block_address_in_blocks, 2777 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2778 meta_sp_cmp_by_offset); 2779 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2780 extent_listpp, (sp_ext_offset_t)(device_size_in_blocks - 2781 MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, 2782 NO_FLAGS, meta_sp_cmp_by_offset); 2783 2784 /* 2785 * Get the list of soft partitions that are already on the 2786 * device. 2787 */ 2788 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2789 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2790 if (getenv(META_SP_DEBUG)) { 2791 mde_perror(ep, 2792 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2793 } 2794 goto fail; 2795 } 2796 2797 if (sp_name_listp != NULL) { 2798 /* 2799 * If there are soft partitions on the device, add the 2800 * extents used in them to the extent list. 2801 */ 2802 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2803 extent_listpp, ep) == -1) { 2804 if (getenv(META_SP_DEBUG)) { 2805 mde_perror(ep, "meta_sp_get_extent_list:" 2806 "meta_sp_extlist_from_namelist"); 2807 } 2808 goto fail; 2809 } 2810 metafreenamelist(sp_name_listp); 2811 } 2812 2813 /* 2814 * Add free extents to the extent list to represent 2815 * the remaining regions of free space on the 2816 * device. 2817 */ 2818 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2819 return (B_TRUE); 2820 2821 fail: 2822 if (sp_name_listp != NULL) { 2823 metafreenamelist(sp_name_listp); 2824 } 2825 2826 if (*extent_listpp != NULL) { 2827 /* 2828 * meta_sp_list_free sets *extent_listpp to NULL. 2829 */ 2830 meta_sp_list_free(extent_listpp); 2831 } 2832 return (B_FALSE); 2833 } 2834 2835 /* 2836 * IMPORTANT NOTE: This is a static function that calls other functions 2837 * that check its mdsetnamep and mddrivenamep 2838 * input parameters, but expects extent_listpp to 2839 * be a initialized to a valid address to which 2840 * it can write a reference to the extent list that 2841 * it creates. 2842 * 2843 * FUNCTION: meta_sp_get_extent_list_for_drive() 2844 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2845 * for the set containing the drive for 2846 * which the extents are to be listed 2847 * mddrivenamep - a reference to the mddrivename_t structure 2848 * for the drive for which the extents 2849 * are to be listed 2850 * OUTPUT: *extent_listpp - a reference to the extent list for 2851 * the drive; NULL if the function fails 2852 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2853 * B_FALSE if not 2854 * PURPOSE: gets the extent list for a drive when the entire drive 2855 * is to be soft partitioned 2856 */ 2857 static boolean_t 2858 meta_sp_get_extent_list_for_drive( 2859 mdsetname_t *mdsetnamep, 2860 mddrivename_t *mddrivenamep, 2861 sp_ext_node_t **extent_listpp 2862 ) 2863 { 2864 boolean_t can_use; 2865 diskaddr_t free_space; 2866 md_error_t mderror; 2867 mdvtoc_t proposed_vtoc; 2868 int repartition_options; 2869 int return_value; 2870 md_sp_t test_sp_struct; 2871 2872 can_use = B_TRUE; 2873 *extent_listpp = NULL; 2874 mderror = mdnullerror; 2875 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2876 &mderror); 2877 if (test_sp_struct.compnamep == NULL) { 2878 can_use = B_FALSE; 2879 } 2880 2881 if (can_use == B_TRUE) { 2882 mderror = mdnullerror; 2883 repartition_options = 0; 2884 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2885 MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror); 2886 if (return_value != 0) { 2887 can_use = B_FALSE; 2888 } 2889 } 2890 2891 if (can_use == B_TRUE) { 2892 mderror = mdnullerror; 2893 repartition_options = repartition_options | 2894 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2895 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2896 repartition_options, &proposed_vtoc, &mderror); 2897 if (return_value != 0) { 2898 can_use = B_FALSE; 2899 } 2900 } 2901 2902 if (can_use == B_TRUE) { 2903 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2904 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2905 can_use = B_FALSE; 2906 } 2907 } 2908 2909 if (can_use == B_TRUE) { 2910 /* 2911 * Create an extent list that starts with 2912 * a reserved extent that ends at the start 2913 * of the usable space on slice zero of the 2914 * proposed VTOC, ends with an extent that 2915 * reserves space for a watermark at the end 2916 * of slice zero, and contains a single free 2917 * extent that occupies the rest of the space 2918 * on the slice. 2919 * 2920 * NOTE: 2921 * 2922 * Don't use metagetstart() or metagetsize() to 2923 * find the usable space. They query the mdname_t 2924 * structure that represents an actual device to 2925 * determine the amount of space on the device that 2926 * contains metadata and the total amount of space 2927 * on the device. Since this function creates a 2928 * proposed extent list that doesn't reflect the 2929 * state of an actual device, there's no mdname_t 2930 * structure to be queried. 2931 * 2932 * When a drive is reformatted to prepare for 2933 * soft partitioning, all of slice seven is 2934 * reserved for metadata, all of slice zero is 2935 * available for soft partitioning, and all other 2936 * slices on the drive are empty. The proposed 2937 * extent list for the drive therefore contains 2938 * only three extents: a reserved extent that ends 2939 * at the start of the usable space on slice zero, 2940 * a single free extent that occupies all the usable 2941 * space on slice zero, and an ending extent that 2942 * reserves space for a watermark at the end of 2943 * slice zero. 2944 */ 2945 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2946 extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START), 2947 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2948 meta_sp_cmp_by_offset); 2949 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2950 extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2951 MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS, 2952 meta_sp_cmp_by_offset); 2953 meta_sp_list_freefill(extent_listpp, free_space); 2954 } 2955 return (can_use); 2956 } 2957 2958 /* 2959 * FUNCTION: meta_sp_can_create_sps() 2960 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2961 * for the set containing the device for 2962 * which the extents are to be listed 2963 * mdnamep - a reference to the mdname_t of the device 2964 * on which the soft parititions are to be created 2965 * number_of_sps - the desired number of soft partitions 2966 * sp_size - the desired soft partition size 2967 * OUTPUT: boolean_t return value 2968 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 2969 * B_FALSE if not 2970 * PURPOSE: determines whether a set of soft partitions can be created 2971 * on a device 2972 */ 2973 boolean_t 2974 meta_sp_can_create_sps( 2975 mdsetname_t *mdsetnamep, 2976 mdname_t *mdnamep, 2977 int number_of_sps, 2978 blkcnt_t sp_size 2979 ) 2980 { 2981 sp_ext_node_t *extent_listp; 2982 boolean_t succeeded; 2983 md_error_t mde; 2984 2985 if ((number_of_sps > 0) && (sp_size > 0)) { 2986 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 2987 &extent_listp, &mde); 2988 } else { 2989 succeeded = B_FALSE; 2990 } 2991 2992 /* 2993 * We don't really care about an error return from the 2994 * alignment call; that will just result in passing zero, 2995 * which will be interpreted as no alignment. 2996 */ 2997 2998 if (succeeded == B_TRUE) { 2999 succeeded = meta_sp_enough_space(number_of_sps, 3000 sp_size, &extent_listp, 3001 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3002 meta_sp_list_free(&extent_listp); 3003 } 3004 return (succeeded); 3005 } 3006 3007 /* 3008 * FUNCTION: meta_sp_can_create_sps_on_drive() 3009 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3010 * for the set containing the drive for 3011 * which the extents are to be listed 3012 * mddrivenamep - a reference to the mddrivename_t of the drive 3013 * on which the soft parititions are to be created 3014 * number_of_sps - the desired number of soft partitions 3015 * sp_size - the desired soft partition size 3016 * OUTPUT: boolean_t return value 3017 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3018 * B_FALSE if not 3019 * PURPOSE: determines whether a set of soft partitions can be created 3020 * on a drive if the entire drive is soft partitioned 3021 */ 3022 boolean_t 3023 meta_sp_can_create_sps_on_drive( 3024 mdsetname_t *mdsetnamep, 3025 mddrivename_t *mddrivenamep, 3026 int number_of_sps, 3027 blkcnt_t sp_size 3028 ) 3029 { 3030 sp_ext_node_t *extent_listp; 3031 boolean_t succeeded; 3032 3033 if ((number_of_sps > 0) && (sp_size > 0)) { 3034 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3035 mddrivenamep, &extent_listp); 3036 } else { 3037 succeeded = B_FALSE; 3038 } 3039 3040 /* 3041 * We don't care about alignment on the space call because 3042 * we're specifically dealing with a drive, which will have no 3043 * inherent alignment. 3044 */ 3045 3046 if (succeeded == B_TRUE) { 3047 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3048 &extent_listp, SP_UNALIGNED); 3049 meta_sp_list_free(&extent_listp); 3050 } 3051 return (succeeded); 3052 } 3053 3054 /* 3055 * FUNCTION: meta_sp_get_free_space() 3056 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3057 * for the set containing the device for 3058 * which the free space is to be returned 3059 * mdnamep - a reference to the mdname_t of the device 3060 * for which the free space is to be returned 3061 * OUTPUT: blkcnt_t return value 3062 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3063 * PURPOSE: returns the number of blocks of free space on a device 3064 */ 3065 blkcnt_t 3066 meta_sp_get_free_space( 3067 mdsetname_t *mdsetnamep, 3068 mdname_t *mdnamep 3069 ) 3070 { 3071 sp_ext_node_t *extent_listp; 3072 sp_ext_length_t free_blocks; 3073 boolean_t succeeded; 3074 md_error_t mde; 3075 3076 extent_listp = NULL; 3077 free_blocks = 0; 3078 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3079 &extent_listp, &mde); 3080 if (succeeded == B_TRUE) { 3081 free_blocks = meta_sp_list_size(extent_listp, 3082 EXTTYP_FREE, INCLUDE_WM); 3083 meta_sp_list_free(&extent_listp); 3084 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3085 /* 3086 * Subtract a safety margin for watermarks when 3087 * computing the number of blocks available for 3088 * use. The actual number of watermarks can't 3089 * be calculated without knowing the exact numbers 3090 * and sizes of both the free extents and the soft 3091 * partitions to be created. The calculation is 3092 * highly complex and error-prone even if those 3093 * quantities are known. The approximate value 3094 * 10 * MD_SP_WMSIZE is within a few blocks of the 3095 * correct value in all practical cases. 3096 */ 3097 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3098 } else { 3099 free_blocks = 0; 3100 } 3101 } else { 3102 mdclrerror(&mde); 3103 } 3104 3105 return (free_blocks); 3106 } 3107 3108 /* 3109 * FUNCTION: meta_sp_get_free_space_on_drive() 3110 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3111 * for the set containing the drive for 3112 * which the free space is to be returned 3113 * mddrivenamep - a reference to the mddrivename_t of the drive 3114 * for which the free space is to be returned 3115 * OUTPUT: blkcnt_t return value 3116 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3117 * PURPOSE: returns the number of blocks of space usable for soft 3118 * partitions on an entire drive, if the entire drive is 3119 * soft partitioned 3120 */ 3121 blkcnt_t 3122 meta_sp_get_free_space_on_drive( 3123 mdsetname_t *mdsetnamep, 3124 mddrivename_t *mddrivenamep 3125 ) 3126 { 3127 sp_ext_node_t *extent_listp; 3128 sp_ext_length_t free_blocks; 3129 boolean_t succeeded; 3130 3131 extent_listp = NULL; 3132 free_blocks = 0; 3133 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3134 mddrivenamep, &extent_listp); 3135 if (succeeded == B_TRUE) { 3136 free_blocks = meta_sp_list_size(extent_listp, 3137 EXTTYP_FREE, INCLUDE_WM); 3138 meta_sp_list_free(&extent_listp); 3139 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3140 /* 3141 * Subtract a safety margin for watermarks when 3142 * computing the number of blocks available for 3143 * use. The actual number of watermarks can't 3144 * be calculated without knowing the exact numbers 3145 * and sizes of both the free extents and the soft 3146 * partitions to be created. The calculation is 3147 * highly complex and error-prone even if those 3148 * quantities are known. The approximate value 3149 * 10 * MD_SP_WMSIZE is within a few blocks of the 3150 * correct value in all practical cases. 3151 */ 3152 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3153 } else { 3154 free_blocks = 0; 3155 } 3156 } 3157 return (free_blocks); 3158 } 3159 3160 /* 3161 * FUNCTION: meta_sp_get_number_of_possible_sps() 3162 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3163 * for the set containing the device for 3164 * which the number of possible soft partitions 3165 * is to be returned 3166 * mdnamep - a reference to the mdname_t of the device 3167 * for which the number of possible soft partitions 3168 * is to be returned 3169 * OUTPUT: int return value 3170 * RETURNS: int - the number of soft partitions of the desired size 3171 * that can be created on the device 3172 * PURPOSE: returns the number of soft partitions of a given size 3173 * that can be created on a device 3174 */ 3175 int 3176 meta_sp_get_number_of_possible_sps( 3177 mdsetname_t *mdsetnamep, 3178 mdname_t *mdnamep, 3179 blkcnt_t sp_size 3180 ) 3181 { 3182 sp_ext_node_t *extent_listp; 3183 int number_of_possible_sps; 3184 boolean_t succeeded; 3185 md_error_t mde; 3186 sp_ext_length_t alignment; 3187 3188 extent_listp = NULL; 3189 number_of_possible_sps = 0; 3190 if (sp_size > 0) { 3191 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3192 mdnamep, &extent_listp, &mde)) == B_FALSE) 3193 mdclrerror(&mde); 3194 } else { 3195 succeeded = B_FALSE; 3196 } 3197 3198 if (succeeded == B_TRUE) { 3199 alignment = meta_sp_get_default_alignment(mdsetnamep, 3200 mdnamep, &mde); 3201 } 3202 3203 while (succeeded == B_TRUE) { 3204 /* 3205 * Keep allocating space from the extent list 3206 * for soft partitions of the desired size until 3207 * there's not enough free space left in the list 3208 * for another soft partiition of that size. 3209 * Add one to the number of possible soft partitions 3210 * for each soft partition for which there is 3211 * enough free space left. 3212 */ 3213 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3214 sp_size, &extent_listp, alignment); 3215 if (succeeded == B_TRUE) { 3216 number_of_possible_sps++; 3217 } 3218 } 3219 if (extent_listp != NULL) { 3220 meta_sp_list_free(&extent_listp); 3221 } 3222 return (number_of_possible_sps); 3223 } 3224 3225 /* 3226 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3227 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3228 * for the set containing the drive for 3229 * which the number of possible soft partitions 3230 * is to be returned 3231 * mddrivenamep - a reference to the mddrivename_t of the drive 3232 * for which the number of possible soft partitions 3233 * is to be returned 3234 * sp_size - the size in blocks of the proposed soft partitions 3235 * OUTPUT: int return value 3236 * RETURNS: int - the number of soft partitions of the desired size 3237 * that can be created on the drive 3238 * PURPOSE: returns the number of soft partitions of a given size 3239 * that can be created on a drive, if the entire drive is 3240 * soft partitioned 3241 */ 3242 int 3243 meta_sp_get_number_of_possible_sps_on_drive( 3244 mdsetname_t *mdsetnamep, 3245 mddrivename_t *mddrivenamep, 3246 blkcnt_t sp_size 3247 ) 3248 { 3249 sp_ext_node_t *extent_listp; 3250 int number_of_possible_sps; 3251 boolean_t succeeded; 3252 3253 extent_listp = NULL; 3254 number_of_possible_sps = 0; 3255 if (sp_size > 0) { 3256 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3257 mddrivenamep, &extent_listp); 3258 } else { 3259 succeeded = B_FALSE; 3260 } 3261 while (succeeded == B_TRUE) { 3262 /* 3263 * Keep allocating space from the extent list 3264 * for soft partitions of the desired size until 3265 * there's not enough free space left in the list 3266 * for another soft partition of that size. 3267 * Add one to the number of possible soft partitions 3268 * for each soft partition for which there is 3269 * enough free space left. 3270 * 3271 * Since it's a drive, not a metadevice, make no 3272 * assumptions about alignment. 3273 */ 3274 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3275 sp_size, &extent_listp, SP_UNALIGNED); 3276 if (succeeded == B_TRUE) { 3277 number_of_possible_sps++; 3278 } 3279 } 3280 if (extent_listp != NULL) { 3281 meta_sp_list_free(&extent_listp); 3282 } 3283 return (number_of_possible_sps); 3284 } 3285 3286 /* 3287 * FUNCTION: meta_sp_get_possible_sp_size() 3288 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3289 * for the set containing the device for 3290 * which the possible soft partition size 3291 * is to be returned 3292 * mdnamep - a reference to the mdname_t of the device 3293 * for which the possible soft partition size 3294 * is to be returned 3295 * number_of_sps - the desired number of soft partitions 3296 * OUTPUT: blkcnt_t return value 3297 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3298 * PURPOSE: returns the maximum possible size of each of a given number of 3299 * soft partitions of equal size that can be created on a device 3300 */ 3301 blkcnt_t 3302 meta_sp_get_possible_sp_size( 3303 mdsetname_t *mdsetnamep, 3304 mdname_t *mdnamep, 3305 int number_of_sps 3306 ) 3307 { 3308 blkcnt_t free_blocks; 3309 blkcnt_t sp_size; 3310 boolean_t succeeded; 3311 3312 sp_size = 0; 3313 if (number_of_sps > 0) { 3314 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3315 sp_size = free_blocks / number_of_sps; 3316 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3317 number_of_sps, sp_size); 3318 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3319 /* 3320 * To compensate for space that may have been 3321 * occupied by watermarks, reduce sp_size by a 3322 * number of blocks equal to the number of soft 3323 * partitions desired, and test again to see 3324 * whether the desired number of soft partitions 3325 * can be created. 3326 */ 3327 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3328 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3329 number_of_sps, sp_size); 3330 } 3331 if (sp_size < 0) { 3332 sp_size = 0; 3333 } 3334 } 3335 return (sp_size); 3336 } 3337 3338 /* 3339 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3340 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3341 * for the set containing the drive for 3342 * which the possible soft partition size 3343 * is to be returned 3344 * mddrivenamep - a reference to the mddrivename_t of the drive 3345 * for which the possible soft partition size 3346 * is to be returned 3347 * number_of_sps - the desired number of soft partitions 3348 * OUTPUT: blkcnt_t return value 3349 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3350 * PURPOSE: returns the maximum possible size of each of a given number of 3351 * soft partitions of equal size that can be created on a drive 3352 * if the entire drive is soft partitioned 3353 */ 3354 blkcnt_t 3355 meta_sp_get_possible_sp_size_on_drive( 3356 mdsetname_t *mdsetnamep, 3357 mddrivename_t *mddrivenamep, 3358 int number_of_sps 3359 ) 3360 { 3361 blkcnt_t free_blocks; 3362 blkcnt_t sp_size; 3363 boolean_t succeeded; 3364 3365 sp_size = 0; 3366 if (number_of_sps > 0) { 3367 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3368 mddrivenamep); 3369 sp_size = free_blocks / number_of_sps; 3370 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3371 mddrivenamep, number_of_sps, sp_size); 3372 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3373 /* 3374 * To compensate for space that may have been 3375 * occupied by watermarks, reduce sp_size by a 3376 * number of blocks equal to the number of soft 3377 * partitions desired, and test again to see 3378 * whether the desired number of soft partitions 3379 * can be created. 3380 */ 3381 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3382 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3383 mddrivenamep, number_of_sps, sp_size); 3384 } 3385 if (sp_size < 0) { 3386 sp_size = 0; 3387 } 3388 } 3389 return (sp_size); 3390 } 3391 3392 /* 3393 * ************************************************************************** 3394 * Unit Structure Manipulation Functions * 3395 * ************************************************************************** 3396 */ 3397 3398 /* 3399 * FUNCTION: meta_sp_fillextarray() 3400 * INPUT: mp - the unit structure to fill 3401 * extlist - the list of extents to fill with 3402 * OUTPUT: none 3403 * RETURNS: void 3404 * PURPOSE: fills in the unit structure extent list with the extents 3405 * specified by extlist. Only extents in extlist with the 3406 * EXTFLG_UPDATE flag are changed in the unit structure, 3407 * and the index into the unit structure is the sequence 3408 * number in the extent list. After all of the nodes have 3409 * been updated the virtual offsets in the unit structure 3410 * are updated to reflect the new lengths. 3411 */ 3412 static void 3413 meta_sp_fillextarray( 3414 mp_unit_t *mp, 3415 sp_ext_node_t *extlist 3416 ) 3417 { 3418 int i; 3419 sp_ext_node_t *ext; 3420 sp_ext_offset_t curvoff = 0LL; 3421 3422 assert(mp != NULL); 3423 3424 /* go through the allocation list and fill in our unit structure */ 3425 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3426 if ((ext->ext_type == EXTTYP_ALLOC) && 3427 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3428 mp->un_ext[ext->ext_seq].un_poff = 3429 ext->ext_offset + MD_SP_WMSIZE; 3430 mp->un_ext[ext->ext_seq].un_len = 3431 ext->ext_length - MD_SP_WMSIZE; 3432 } 3433 } 3434 3435 for (i = 0; i < mp->un_numexts; i++) { 3436 assert(mp->un_ext[i].un_poff != 0); 3437 assert(mp->un_ext[i].un_len != 0); 3438 mp->un_ext[i].un_voff = curvoff; 3439 curvoff += mp->un_ext[i].un_len; 3440 } 3441 } 3442 3443 /* 3444 * FUNCTION: meta_sp_createunit() 3445 * INPUT: np - the name of the device to create a unit structure for 3446 * compnp - the name of the device the soft partition is on 3447 * extlist - the extent list to populate the new unit with 3448 * numexts - the number of extents in the extent list 3449 * len - the total size of the soft partition (sectors) 3450 * status - the initial status of the unit structure 3451 * OUTPUT: ep - return error pointer 3452 * RETURNS: mp_unit_t * - the new unit structure. 3453 * PURPOSE: allocates and fills in a new soft partition unit 3454 * structure to be passed to the soft partitioning driver 3455 * for creation. 3456 */ 3457 static mp_unit_t * 3458 meta_sp_createunit( 3459 mdname_t *np, 3460 mdname_t *compnp, 3461 sp_ext_node_t *extlist, 3462 int numexts, 3463 sp_ext_length_t len, 3464 sp_status_t status, 3465 md_error_t *ep 3466 ) 3467 { 3468 mp_unit_t *mp; 3469 uint_t ms_size; 3470 3471 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3472 (numexts * sizeof (mp->un_ext[0])); 3473 3474 mp = Zalloc(ms_size); 3475 3476 /* fill in fields in common unit structure */ 3477 mp->c.un_type = MD_METASP; 3478 mp->c.un_size = ms_size; 3479 MD_SID(mp) = meta_getminor(np->dev); 3480 mp->c.un_total_blocks = len; 3481 mp->c.un_actual_tb = len; 3482 3483 /* set up geometry */ 3484 (void) meta_sp_setgeom(np, compnp, mp, ep); 3485 3486 /* if we're building on metadevice we can't parent */ 3487 if (metaismeta(compnp)) 3488 MD_CAPAB(mp) = MD_CANT_PARENT; 3489 else 3490 MD_CAPAB(mp) = MD_CAN_PARENT; 3491 3492 /* fill soft partition-specific fields */ 3493 mp->un_dev = compnp->dev; 3494 mp->un_key = compnp->key; 3495 3496 /* mdname_t start_blk field is not 64-bit! */ 3497 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3498 mp->un_status = status; 3499 mp->un_numexts = numexts; 3500 mp->un_length = len; 3501 3502 /* fill in the extent array */ 3503 meta_sp_fillextarray(mp, extlist); 3504 3505 return (mp); 3506 } 3507 3508 /* 3509 * FUNCTION: meta_sp_updateunit() 3510 * INPUT: np - name structure for the metadevice being updated 3511 * old_un - the original unit structure that is being updated 3512 * extlist - the extent list to populate the new unit with 3513 * grow_len - the amount by which the partition is being grown 3514 * numexts - the number of extents in the extent list 3515 * ep - return error pointer 3516 * OUTPUT: none 3517 * RETURNS: mp_unit_t * - the updated unit structure 3518 * PURPOSE: allocates and fills in a new soft partition unit structure to 3519 * be passed to the soft partitioning driver for creation. The 3520 * old unit structure is first copied in, and then the updated 3521 * extents are changed in the new unit structure. This is 3522 * typically used when the size of an existing unit is changed. 3523 */ 3524 static mp_unit_t * 3525 meta_sp_updateunit( 3526 mdname_t *np, 3527 mp_unit_t *old_un, 3528 sp_ext_node_t *extlist, 3529 sp_ext_length_t grow_len, 3530 int numexts, 3531 md_error_t *ep 3532 ) 3533 { 3534 mp_unit_t *new_un; 3535 sp_ext_length_t new_len; 3536 uint_t new_size; 3537 3538 assert(old_un != NULL); 3539 assert(extlist != NULL); 3540 3541 /* allocate new unit structure and copy in old unit */ 3542 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3543 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3544 new_len = old_un->un_length + grow_len; 3545 new_un = Zalloc(new_size); 3546 bcopy(old_un, new_un, old_un->c.un_size); 3547 3548 /* update size and geometry information */ 3549 new_un->c.un_size = new_size; 3550 new_un->un_length = new_len; 3551 new_un->c.un_total_blocks = new_len; 3552 new_un->c.un_actual_tb = new_len; 3553 if (meta_adjust_geom((md_unit_t *)new_un, np, 3554 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3555 0, ep) != 0) { 3556 Free(new_un); 3557 return (NULL); 3558 } 3559 3560 /* update extent information */ 3561 new_un->un_numexts += numexts; 3562 3563 meta_sp_fillextarray(new_un, extlist); 3564 3565 return (new_un); 3566 } 3567 3568 /* 3569 * FUNCTION: meta_get_sp() 3570 * INPUT: sp - the set name for the device to get 3571 * np - the name of the device to get 3572 * OUTPUT: ep - return error pointer 3573 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3574 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3575 * for the named device. Just a wrapper for meta_get_sp_common(). 3576 */ 3577 md_sp_t * 3578 meta_get_sp( 3579 mdsetname_t *sp, 3580 mdname_t *np, 3581 md_error_t *ep 3582 ) 3583 { 3584 return (meta_get_sp_common(sp, np, 0, ep)); 3585 } 3586 3587 /* 3588 * FUNCTION: meta_get_sp_common() 3589 * INPUT: sp - the set name for the device to get 3590 * np - the name of the device to get 3591 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3592 * OUTPUT: ep - return error pointer 3593 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3594 * NULL if np is not a soft partition 3595 * PURPOSE: common routine for fetching a soft partition unit structure 3596 */ 3597 md_sp_t * 3598 meta_get_sp_common( 3599 mdsetname_t *sp, 3600 mdname_t *np, 3601 int fast, 3602 md_error_t *ep 3603 ) 3604 { 3605 mddrivename_t *dnp = np->drivenamep; 3606 char *miscname; 3607 mp_unit_t *mp; 3608 md_sp_t *msp; 3609 int i; 3610 3611 /* must have set */ 3612 assert(sp != NULL); 3613 3614 /* short circuit */ 3615 if (dnp->unitp != NULL) { 3616 if (dnp->unitp->type != MD_METASP) 3617 return (NULL); 3618 return ((md_sp_t *)dnp->unitp); 3619 } 3620 /* get miscname and unit */ 3621 if ((miscname = metagetmiscname(np, ep)) == NULL) 3622 return (NULL); 3623 3624 if (strcmp(miscname, MD_SP) != 0) { 3625 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3626 return (NULL); 3627 } 3628 3629 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3630 return (NULL); 3631 3632 assert(mp->c.un_type == MD_METASP); 3633 3634 /* allocate soft partition */ 3635 msp = Zalloc(sizeof (*msp)); 3636 3637 /* get the common information */ 3638 msp->common.namep = np; 3639 msp->common.type = mp->c.un_type; 3640 msp->common.state = mp->c.un_status; 3641 msp->common.capabilities = mp->c.un_capabilities; 3642 msp->common.parent = mp->c.un_parent; 3643 msp->common.size = mp->c.un_total_blocks; 3644 msp->common.user_flags = mp->c.un_user_flags; 3645 msp->common.revision = mp->c.un_revision; 3646 3647 /* get soft partition information */ 3648 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3649 goto out; 3650 3651 /* 3652 * Fill in the key and the start block. Note that the start 3653 * block in the unit structure is 64 bits but the name pointer 3654 * only supports 32 bits. 3655 */ 3656 msp->compnamep->key = mp->un_key; 3657 msp->compnamep->start_blk = mp->un_start_blk; 3658 3659 /* fill in status field */ 3660 msp->status = mp->un_status; 3661 3662 /* allocate the extents */ 3663 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3664 msp->ext.ext_len = mp->un_numexts; 3665 3666 /* do the extents for this soft partition */ 3667 for (i = 0; i < mp->un_numexts; i++) { 3668 struct mp_ext *mde = &mp->un_ext[i]; 3669 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3670 3671 extp->voff = mde->un_voff; 3672 extp->poff = mde->un_poff; 3673 extp->len = mde->un_len; 3674 } 3675 3676 /* cleanup, return success */ 3677 Free(mp); 3678 dnp->unitp = (md_common_t *)msp; 3679 return (msp); 3680 3681 out: 3682 /* clean up and return error */ 3683 Free(mp); 3684 Free(msp); 3685 return (NULL); 3686 } 3687 3688 3689 /* 3690 * FUNCTION: meta_init_sp() 3691 * INPUT: spp - the set name for the new device 3692 * argc - the remaining argument count for the metainit cmdline 3693 * argv - the remainder of the unparsed command line 3694 * options - global options parsed by metainit 3695 * OUTPUT: ep - return error pointer 3696 * RETURNS: int - -1 failure, 0 success 3697 * PURPOSE: provides the command line parsing and name management overhead 3698 * for creating a new soft partition. Ultimately this calls 3699 * meta_create_sp() which does the real work of allocating space 3700 * for the new soft partition. 3701 */ 3702 int 3703 meta_init_sp( 3704 mdsetname_t **spp, 3705 int argc, 3706 char *argv[], 3707 mdcmdopts_t options, 3708 md_error_t *ep 3709 ) 3710 { 3711 char *compname = NULL; 3712 mdname_t *spcompnp = NULL; /* name of component volume */ 3713 char *devname = argv[0]; /* unit name */ 3714 mdname_t *np = NULL; /* name of soft partition */ 3715 md_sp_t *msp = NULL; 3716 int c; 3717 int old_optind; 3718 sp_ext_length_t len = 0LL; 3719 int rval = -1; 3720 uint_t seq; 3721 int oflag; 3722 int failed; 3723 mddrivename_t *dnp = NULL; 3724 sp_ext_length_t alignment = 0LL; 3725 sp_ext_node_t *extlist = NULL; 3726 3727 assert(argc > 0); 3728 3729 /* expect sp name, -p, optional -e, compname, and size parameters */ 3730 /* grab soft partition name */ 3731 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3732 goto out; 3733 3734 /* see if it exists already */ 3735 if (metagetmiscname(np, ep) != NULL) { 3736 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3737 meta_getminor(np->dev), devname); 3738 goto out; 3739 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3740 goto out; 3741 } else { 3742 mdclrerror(ep); 3743 } 3744 --argc, ++argv; 3745 3746 if (argc == 0) 3747 goto syntax; 3748 3749 /* grab -p */ 3750 if (strcmp(argv[0], "-p") != 0) 3751 goto syntax; 3752 --argc, ++argv; 3753 3754 if (argc == 0) 3755 goto syntax; 3756 3757 /* see if -e is there */ 3758 if (strcmp(argv[0], "-e") == 0) { 3759 /* use the whole disk */ 3760 options |= MDCMD_USE_WHOLE_DISK; 3761 --argc, ++argv; 3762 } 3763 3764 if (argc == 0) 3765 goto syntax; 3766 3767 /* get component name */ 3768 compname = Strdup(argv[0]); 3769 3770 if (options & MDCMD_USE_WHOLE_DISK) { 3771 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3772 goto out; 3773 } 3774 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3775 goto out; 3776 } 3777 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3778 goto out; 3779 } 3780 assert(*spp != NULL); 3781 3782 if (!(options & MDCMD_NOLOCK)) { 3783 /* grab set lock */ 3784 if (meta_lock(*spp, TRUE, ep)) 3785 goto out; 3786 3787 if (meta_check_ownership(*spp, ep) != 0) 3788 goto out; 3789 } 3790 3791 /* allocate the soft partition */ 3792 msp = Zalloc(sizeof (*msp)); 3793 3794 /* setup common */ 3795 msp->common.namep = np; 3796 msp->common.type = MD_METASP; 3797 3798 compname = spcompnp->cname; 3799 3800 assert(spcompnp->rname != NULL); 3801 --argc, ++argv; 3802 3803 if (argc == 0) { 3804 goto syntax; 3805 } 3806 3807 if (*argv[0] == '-') { 3808 /* 3809 * parse any other command line options, this includes 3810 * the recovery options -o and -b. The special thing 3811 * with these options is that the len needs to be 3812 * kept track of otherwise when the geometry of the 3813 * "device" is built it will create an invalid geometry 3814 */ 3815 old_optind = optind = 0; 3816 opterr = 0; 3817 oflag = 0; 3818 seq = 0; 3819 failed = 0; 3820 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3821 sp_ext_offset_t offset; 3822 sp_ext_length_t length; 3823 longlong_t tmp_size; 3824 3825 switch (c) { 3826 case 'A': /* data alignment */ 3827 if (meta_sp_parsesizestring(optarg, 3828 &alignment) == -1) { 3829 failed = 1; 3830 } 3831 break; 3832 case 'o': /* offset in the partition */ 3833 if (oflag == 1) { 3834 failed = 1; 3835 } else { 3836 tmp_size = atoll(optarg); 3837 if (tmp_size <= 0) { 3838 failed = 1; 3839 } else { 3840 oflag = 1; 3841 options |= MDCMD_DIRECT; 3842 3843 offset = tmp_size; 3844 } 3845 } 3846 3847 break; 3848 case 'b': /* number of blocks */ 3849 if (oflag == 0) { 3850 failed = 1; 3851 } else { 3852 tmp_size = atoll(optarg); 3853 if (tmp_size <= 0) { 3854 failed = 1; 3855 } else { 3856 oflag = 0; 3857 3858 length = tmp_size; 3859 3860 /* we have a pair of values */ 3861 meta_sp_list_insert(*spp, np, 3862 &extlist, offset, length, 3863 EXTTYP_ALLOC, seq++, 3864 EXTFLG_UPDATE, 3865 meta_sp_cmp_by_offset); 3866 len += length; 3867 } 3868 } 3869 3870 break; 3871 default: 3872 argc -= old_optind; 3873 argv += old_optind; 3874 goto options; 3875 } 3876 3877 if (failed) { 3878 argc -= old_optind; 3879 argv += old_optind; 3880 goto syntax; 3881 } 3882 3883 old_optind = optind; 3884 } 3885 argc -= optind; 3886 argv += optind; 3887 3888 /* 3889 * Must have matching pairs of -o and -b flags 3890 */ 3891 if (oflag != 0) 3892 goto syntax; 3893 3894 /* 3895 * Can't specify both layout (indicated indirectly by 3896 * len being set by thye -o/-b cases above) AND 3897 * alignment 3898 */ 3899 if ((len > 0LL) && (alignment > 0LL)) 3900 goto syntax; 3901 3902 /* 3903 * sanity check the allocation list 3904 */ 3905 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3906 goto syntax; 3907 } 3908 3909 if (len == 0LL) { 3910 if (argc == 0) 3911 goto syntax; 3912 if (meta_sp_parsesize(argv[0], &len) == -1) 3913 goto syntax; 3914 --argc, ++argv; 3915 } 3916 3917 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3918 msp->ext.ext_val->len = len; 3919 msp->compnamep = spcompnp; 3920 3921 /* we should be at the end */ 3922 if (argc != 0) 3923 goto syntax; 3924 3925 /* create soft partition */ 3926 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3927 goto out; 3928 rval = 0; 3929 3930 /* let em know */ 3931 if (options & MDCMD_PRINT) { 3932 (void) printf(dgettext(TEXT_DOMAIN, 3933 "%s: Soft Partition is setup\n"), 3934 devname); 3935 (void) fflush(stdout); 3936 } 3937 goto out; 3938 3939 syntax: 3940 /* syntax error */ 3941 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3942 goto out; 3943 3944 options: 3945 /* options error */ 3946 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3947 goto out; 3948 3949 out: 3950 if (msp != NULL) { 3951 if (msp->ext.ext_val != NULL) { 3952 Free(msp->ext.ext_val); 3953 } 3954 Free(msp); 3955 } 3956 3957 return (rval); 3958 } 3959 3960 /* 3961 * FUNCTION: meta_free_sp() 3962 * INPUT: msp - the soft partition unit to free 3963 * OUTPUT: none 3964 * RETURNS: void 3965 * PURPOSE: provides an interface from the rest of libmeta for freeing a 3966 * soft partition unit 3967 */ 3968 void 3969 meta_free_sp(md_sp_t *msp) 3970 { 3971 Free(msp); 3972 } 3973 3974 /* 3975 * FUNCTION: meta_sp_issp() 3976 * INPUT: sp - the set name to check 3977 * np - the name to check 3978 * OUTPUT: ep - return error pointer 3979 * RETURNS: int - 0 means sp,np is a soft partition 3980 * 1 means sp,np is not a soft partition 3981 * PURPOSE: determines whether the given device is a soft partition 3982 * device. This is called by other metadevice check routines. 3983 */ 3984 int 3985 meta_sp_issp( 3986 mdsetname_t *sp, 3987 mdname_t *np, 3988 md_error_t *ep 3989 ) 3990 { 3991 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 3992 return (1); 3993 3994 return (0); 3995 } 3996 3997 /* 3998 * FUNCTION: meta_check_sp() 3999 * INPUT: sp - the set name to check 4000 * msp - the unit structure to check 4001 * options - creation options 4002 * OUTPUT: repart_options - options to be passed to 4003 * meta_repartition_drive() 4004 * ep - return error pointer 4005 * RETURNS: int - 0 ok to create on this component 4006 * -1 error or not ok to create on this component 4007 * PURPOSE: Checks to determine whether the rules for creation of 4008 * soft partitions allow creation of a soft partition on 4009 * the device described by the mdname_t structure referred 4010 * to by msp->compnamep. 4011 * 4012 * NOTE: Does NOT check to determine whether the extents 4013 * described in the md_sp_t structure referred to by 4014 * msp will fit on the device described by the mdname_t 4015 * structure located at msp->compnamep. 4016 */ 4017 static int 4018 meta_check_sp( 4019 mdsetname_t *sp, 4020 md_sp_t *msp, 4021 mdcmdopts_t options, 4022 int *repart_options, 4023 md_error_t *ep 4024 ) 4025 { 4026 md_common_t *mdp; 4027 mdname_t *compnp = msp->compnamep; 4028 uint_t slice; 4029 mddrivename_t *dnp; 4030 mdname_t *slicenp; 4031 mdvtoc_t *vtocp; 4032 4033 /* make sure it is in the set */ 4034 if (meta_check_inset(sp, compnp, ep) != 0) 4035 return (-1); 4036 4037 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4038 uint_t rep_slice; 4039 4040 /* 4041 * check to make sure we can partition this drive. 4042 * we cannot continue if any of the following are 4043 * true: 4044 * The drive is a metadevice. 4045 * The drive contains a mounted slice. 4046 * The drive contains a slice being swapped to. 4047 * The drive contains slices which are part of other 4048 * metadevices. 4049 * The drive contains a metadb. 4050 */ 4051 if (metaismeta(compnp)) 4052 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4053 compnp->cname)); 4054 4055 assert(compnp->drivenamep != NULL); 4056 4057 /* 4058 * ensure that we have slice 0 since the disk will be 4059 * repartitioned in the USE_WHOLE_DISK case. this check 4060 * is redundant unless the user incorrectly specifies a 4061 * a fully qualified drive AND slice name (i.e., 4062 * /dev/dsk/cXtXdXsX), which will be incorrectly 4063 * recognized as a drive name by the metaname code. 4064 */ 4065 4066 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4067 return (-1); 4068 if (slice != MD_SLICE0) 4069 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4070 4071 dnp = compnp->drivenamep; 4072 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4073 return (-1); 4074 4075 for (slice = 0; slice < vtocp->nparts; slice++) { 4076 4077 /* only check if the slice really exists */ 4078 if (vtocp->parts[slice].size == 0) 4079 continue; 4080 4081 slicenp = metaslicename(dnp, slice, ep); 4082 if (slicenp == NULL) 4083 return (-1); 4084 4085 /* check to ensure that it is not already in use */ 4086 if (meta_check_inuse(sp, 4087 slicenp, MDCHK_INUSE, ep) != 0) { 4088 return (-1); 4089 } 4090 4091 /* 4092 * Up to this point, tests are applied to all 4093 * slices uniformly. 4094 */ 4095 4096 if (slice == rep_slice) { 4097 /* 4098 * Tests inside the body of this 4099 * conditional are applied only to 4100 * slice seven. 4101 */ 4102 if (meta_check_inmeta(sp, slicenp, 4103 options | MDCHK_ALLOW_MDDB | 4104 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4105 return (-1); 4106 4107 /* 4108 * For slice seven, a metadb is NOT an 4109 * automatic failure. It merely means 4110 * that we're not allowed to muck 4111 * about with the partitioning of that 4112 * slice. We indicate this by masking 4113 * in the MD_REPART_LEAVE_REP flag. 4114 */ 4115 if (metahasmddb(sp, slicenp, ep)) { 4116 assert(repart_options != 4117 NULL); 4118 *repart_options |= 4119 MD_REPART_LEAVE_REP; 4120 } 4121 4122 /* 4123 * Skip the remaining tests for slice 4124 * seven 4125 */ 4126 continue; 4127 } 4128 4129 /* 4130 * Tests below this point will be applied to 4131 * all slices EXCEPT for the replica slice. 4132 */ 4133 4134 4135 /* check if component is in a metadevice */ 4136 if (meta_check_inmeta(sp, slicenp, options, 0, 4137 -1, ep) != 0) 4138 return (-1); 4139 4140 /* check to see if component has a metadb */ 4141 if (metahasmddb(sp, slicenp, ep)) 4142 return (mddeverror(ep, MDE_HAS_MDDB, 4143 slicenp->dev, slicenp->cname)); 4144 } 4145 /* 4146 * This should be all of the testing necessary when 4147 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4148 * meta_check_sp() is oriented towards component 4149 * arguments instead of disks. 4150 */ 4151 goto meta_check_sp_ok; 4152 4153 } 4154 4155 /* check to ensure that it is not already in use */ 4156 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4157 return (-1); 4158 } 4159 4160 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4161 4162 /* 4163 * The component can have one or more soft partitions on it 4164 * already, but can't be part of any other type of metadevice, 4165 * so if it is used for a metadevice, but the metadevice 4166 * isn't a soft partition, return failure. 4167 */ 4168 4169 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4170 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4171 return (-1); 4172 } 4173 } else { /* handle metadevices */ 4174 /* get underlying unit & check capabilities */ 4175 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4176 return (-1); 4177 4178 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4179 (! (mdp->capabilities & MD_CAN_SP))) 4180 return (mdmderror(ep, MDE_INVAL_UNIT, 4181 meta_getminor(compnp->dev), compnp->cname)); 4182 } 4183 4184 meta_check_sp_ok: 4185 mdclrerror(ep); 4186 return (0); 4187 } 4188 4189 /* 4190 * FUNCTION: meta_create_sp() 4191 * INPUT: sp - the set name to create in 4192 * msp - the unit structure to create 4193 * oblist - an optional list of requested extents (-o/-b options) 4194 * options - creation options 4195 * alignment - data alignment 4196 * OUTPUT: ep - return error pointer 4197 * RETURNS: int - 0 success, -1 error 4198 * PURPOSE: does most of the work for creating a soft partition. If 4199 * metainit -p -e was used, first partition the drive. Then 4200 * create an extent list based on the existing soft partitions 4201 * and assume all space not used by them is free. Storage for 4202 * the new soft partition is allocated from the free extents 4203 * based on the length specified on the command line or the 4204 * oblist passed in. The unit structure is then committed and 4205 * the watermarks are updated. Finally, the status is changed to 4206 * Okay and the process is complete. 4207 */ 4208 static int 4209 meta_create_sp( 4210 mdsetname_t *sp, 4211 md_sp_t *msp, 4212 sp_ext_node_t *oblist, 4213 mdcmdopts_t options, 4214 sp_ext_length_t alignment, 4215 md_error_t *ep 4216 ) 4217 { 4218 mdname_t *np = msp->common.namep; 4219 mdname_t *compnp = msp->compnamep; 4220 mp_unit_t *mp = NULL; 4221 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4222 md_set_params_t set_params; 4223 int rval = -1; 4224 diskaddr_t comp_size; 4225 diskaddr_t sp_start; 4226 sp_ext_node_t *extlist = NULL; 4227 int numexts = 0; /* number of extents */ 4228 int count = 0; 4229 int committed = 0; 4230 int repart_options = MD_REPART_FORCE; 4231 int create_flag = MD_CRO_32BIT; 4232 4233 md_set_desc *sd; 4234 mm_unit_t *mm; 4235 md_set_mmown_params_t *ownpar = NULL; 4236 int comp_is_mirror = 0; 4237 4238 /* validate soft partition */ 4239 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4240 return (-1); 4241 4242 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4243 if ((options & MDCMD_DOIT) != 0) { 4244 if (meta_repartition_drive(sp, 4245 compnp->drivenamep, 4246 repart_options, 4247 NULL, /* Don't return the VTOC */ 4248 ep) != 0) 4249 4250 return (-1); 4251 } else { 4252 /* 4253 * If -n and -e are both specified, it doesn't make 4254 * sense to continue without actually partitioning 4255 * the drive. 4256 */ 4257 return (0); 4258 } 4259 } 4260 4261 /* populate the start_blk field of the component name */ 4262 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4263 MD_DISKADDR_ERROR) { 4264 rval = -1; 4265 goto out; 4266 } 4267 4268 if (options & MDCMD_DOIT) { 4269 /* store name in namespace */ 4270 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4271 rval = -1; 4272 goto out; 4273 } 4274 } 4275 4276 /* 4277 * Get a list of the soft partitions that currently reside on 4278 * the component. We should ALWAYS force reload the cache, 4279 * because if this is a single creation, there will not BE a 4280 * cached list, and if we're using the md.tab, we must rebuild 4281 * the list because it won't contain the previous (if any) 4282 * soft partition. 4283 */ 4284 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4285 if (count < 0) { 4286 /* error occured */ 4287 rval = -1; 4288 goto out; 4289 } 4290 4291 /* 4292 * get the size of the underlying device. if the size is smaller 4293 * than or equal to the watermark size, we know there isn't 4294 * enough space. 4295 */ 4296 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4297 rval = -1; 4298 goto out; 4299 } else if (comp_size <= MD_SP_WMSIZE) { 4300 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4301 rval = -1; 4302 goto out; 4303 } 4304 /* 4305 * seed extlist with reserved space at the beginning of the volume and 4306 * enough space for the end watermark. The end watermark always gets 4307 * updated, but if the underlying device changes size it may not be 4308 * pointed to until the extent before it is updated. Since the 4309 * end of the reserved space is where the first watermark starts, 4310 * the reserved extent should never be marked for updating. 4311 */ 4312 4313 meta_sp_list_insert(NULL, NULL, &extlist, 4314 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4315 meta_sp_list_insert(NULL, NULL, &extlist, 4316 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4317 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4318 4319 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4320 rval = -1; 4321 goto out; 4322 } 4323 4324 metafreenamelist(spnlp); 4325 4326 if (getenv(META_SP_DEBUG)) { 4327 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4328 meta_sp_list_dump(extlist); 4329 } 4330 4331 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4332 4333 /* get extent list from -o/-b options or from free space */ 4334 if (options & MDCMD_DIRECT) { 4335 if (getenv(META_SP_DEBUG)) { 4336 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4337 meta_sp_list_dump(oblist); 4338 } 4339 4340 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4341 if (numexts == -1) { 4342 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4343 rval = -1; 4344 goto out; 4345 } 4346 } else { 4347 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4348 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4349 meta_sp_get_default_alignment(sp, compnp, ep)); 4350 if (numexts == -1) { 4351 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4352 rval = -1; 4353 goto out; 4354 } 4355 } 4356 4357 assert(extlist != NULL); 4358 4359 /* create soft partition */ 4360 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4361 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4362 4363 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4364 4365 /* if we're not doing anything (metainit -n), return success */ 4366 if (! (options & MDCMD_DOIT)) { 4367 rval = 0; /* success */ 4368 goto out; 4369 } 4370 4371 (void) memset(&set_params, 0, sizeof (set_params)); 4372 4373 if (create_flag == MD_CRO_64BIT) { 4374 mp->c.un_revision |= MD_64BIT_META_DEV; 4375 set_params.options = MD_CRO_64BIT; 4376 } else { 4377 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4378 set_params.options = MD_CRO_32BIT; 4379 } 4380 4381 if (getenv(META_SP_DEBUG)) { 4382 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4383 meta_sp_printunit(mp); 4384 } 4385 4386 /* 4387 * Check to see if we're trying to create a partition on a mirror. If so 4388 * we may have to enforce an ownership change before writing the 4389 * watermark out. 4390 */ 4391 if (metaismeta(compnp)) { 4392 char *miscname; 4393 4394 miscname = metagetmiscname(compnp, ep); 4395 if (miscname != NULL) 4396 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4397 else 4398 comp_is_mirror = 0; 4399 } else { 4400 comp_is_mirror = 0; 4401 } 4402 4403 /* 4404 * For a multi-node environment we have to ensure that the master 4405 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4406 * If the master does not own the device we will deadlock as the 4407 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4408 * ownership change that will block as the MD_IOCSET is still in 4409 * progress. To close this window we force an owner change to occur 4410 * before issuing the MD_IOCSET. We cannot simply open the device and 4411 * write to it as this will only work for the first soft-partition 4412 * creation. 4413 */ 4414 4415 if (comp_is_mirror && !metaislocalset(sp)) { 4416 4417 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4418 rval = -1; 4419 goto out; 4420 } 4421 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4422 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 4423 if (mm == NULL) { 4424 rval = -1; 4425 goto out; 4426 } else { 4427 rval = meta_mn_change_owner(&ownpar, sp->setno, 4428 meta_getminor(compnp->dev), 4429 sd->sd_mn_mynode->nd_nodeid, 4430 MD_MN_MM_PREVENT_CHANGE | 4431 MD_MN_MM_SPAWN_THREAD); 4432 if (rval == -1) 4433 goto out; 4434 } 4435 } 4436 } 4437 4438 set_params.mnum = MD_SID(mp); 4439 set_params.size = mp->c.un_size; 4440 set_params.mdp = (uintptr_t)mp; 4441 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4442 4443 /* first phase of commit. */ 4444 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4445 np->cname) != 0) { 4446 (void) mdstealerror(ep, &set_params.mde); 4447 rval = -1; 4448 goto out; 4449 } 4450 4451 /* we've successfully committed the record */ 4452 committed = 1; 4453 4454 /* write watermarks */ 4455 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4456 rval = -1; 4457 goto out; 4458 } 4459 4460 /* 4461 * Allow mirror ownership to change. If we don't succeed in this 4462 * ioctl it isn't fatal, but the cluster will probably hang fairly 4463 * soon as the mirror owner won't change. However, we have 4464 * successfully written the watermarks out to the device so the 4465 * softpart creation has succeeded 4466 */ 4467 if (ownpar) { 4468 (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum, 4469 ownpar->d.owner, 4470 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 4471 } 4472 4473 /* second phase of commit, set status to MD_SP_OK */ 4474 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4475 rval = -1; 4476 goto out; 4477 } 4478 rval = 0; 4479 out: 4480 Free(mp); 4481 if (ownpar) 4482 Free(ownpar); 4483 4484 if (extlist != NULL) 4485 meta_sp_list_free(&extlist); 4486 4487 if (rval != 0 && keynlp != NULL && committed != 1) 4488 (void) del_key_names(sp, keynlp, NULL); 4489 4490 metafreenamelist(keynlp); 4491 4492 return (rval); 4493 } 4494 4495 /* 4496 * ************************************************************************** 4497 * Reset (metaclear) Functions * 4498 * ************************************************************************** 4499 */ 4500 4501 /* 4502 * FUNCTION: meta_sp_reset_common() 4503 * INPUT: sp - the set name of the device to reset 4504 * np - the name of the device to reset 4505 * msp - the unit structure to reset 4506 * options - metaclear options 4507 * OUTPUT: ep - return error pointer 4508 * RETURNS: int - 0 success, -1 error 4509 * PURPOSE: "resets", or more accurately deletes, the soft partition 4510 * specified. First the state is set to "deleting" and then the 4511 * watermarks are all cleared out. Once the watermarks have been 4512 * updated, the unit structure is deleted from the metadb. 4513 */ 4514 static int 4515 meta_sp_reset_common( 4516 mdsetname_t *sp, 4517 mdname_t *np, 4518 md_sp_t *msp, 4519 md_sp_reset_t reset_params, 4520 mdcmdopts_t options, 4521 md_error_t *ep 4522 ) 4523 { 4524 char *miscname; 4525 int rval = -1; 4526 int is_open = 0; 4527 4528 /* make sure that nobody owns us */ 4529 if (MD_HAS_PARENT(msp->common.parent)) 4530 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4531 np->cname)); 4532 4533 /* make sure that the soft partition isn't open */ 4534 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4535 return (-1); 4536 else if (is_open) 4537 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4538 np->cname)); 4539 4540 /* get miscname */ 4541 if ((miscname = metagetmiscname(np, ep)) == NULL) 4542 return (-1); 4543 4544 /* fill in reset params */ 4545 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4546 reset_params.mnum = meta_getminor(np->dev); 4547 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4548 4549 /* 4550 * clear soft partition - phase one. 4551 * place the soft partition into the "delete pending" state. 4552 */ 4553 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4554 return (-1); 4555 4556 /* 4557 * Now clear the watermarks. If the force flag is specified, 4558 * ignore any errors writing the watermarks and delete the unit 4559 * structure anyway. An error may leave the on-disk format in a 4560 * corrupt state. If force is not specified and we fail here, 4561 * the soft partition will remain in the "delete pending" state. 4562 */ 4563 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4564 ((options & MDCMD_FORCE) == 0)) 4565 goto out; 4566 4567 /* 4568 * clear soft partition - phase two. 4569 * the driver removes the soft partition from the metadb and 4570 * zeros out incore version. 4571 */ 4572 if (metaioctl(MD_IOCRESET, &reset_params, 4573 &reset_params.mde, np->cname) != 0) { 4574 (void) mdstealerror(ep, &reset_params.mde); 4575 goto out; 4576 } 4577 4578 /* 4579 * Wait for the /dev to be cleaned up. Ignore the return 4580 * value since there's not much we can do. 4581 */ 4582 (void) meta_update_devtree(meta_getminor(np->dev)); 4583 4584 rval = 0; /* success */ 4585 4586 if (options & MDCMD_PRINT) { 4587 (void) printf(dgettext(TEXT_DOMAIN, 4588 "%s: Soft Partition is cleared\n"), 4589 np->cname); 4590 (void) fflush(stdout); 4591 } 4592 4593 /* 4594 * if told to recurse and on a metadevice, then attempt to 4595 * clear the subdevices. Indicate failure if the clear fails. 4596 */ 4597 if ((options & MDCMD_RECURSE) && 4598 (metaismeta(msp->compnamep)) && 4599 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4600 rval = -1; 4601 4602 out: 4603 meta_invalidate_name(np); 4604 return (rval); 4605 } 4606 4607 /* 4608 * FUNCTION: meta_sp_reset() 4609 * INPUT: sp - the set name of the device to reset 4610 * np - the name of the device to reset 4611 * options - metaclear options 4612 * OUTPUT: ep - return error pointer 4613 * RETURNS: int - 0 success, -1 error 4614 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4615 * soft partition. If np is NULL, then soft partitions are 4616 * all deleted at the current level and then recursively deleted. 4617 * Otherwise, if a name is specified either directly or as a 4618 * result of a recursive operation, it deletes only that name. 4619 * Since something sitting under a soft partition may be parented 4620 * to it, we have to reparent that other device to another soft 4621 * partition on the same component if we're deleting the one it's 4622 * parented to. 4623 */ 4624 int 4625 meta_sp_reset( 4626 mdsetname_t *sp, 4627 mdname_t *np, 4628 mdcmdopts_t options, 4629 md_error_t *ep 4630 ) 4631 { 4632 md_sp_t *msp; 4633 int rval = -1; 4634 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4635 md_sp_reset_t reset_params; 4636 int num_sp; 4637 4638 assert(sp != NULL); 4639 4640 /* reset/delete all soft paritions */ 4641 if (np == NULL) { 4642 /* 4643 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4644 * is incorrect for soft partitions. We want to clear 4645 * all soft partitions at a particular level in the 4646 * metadevice stack before moving to the next level. 4647 * Thus, we clear MDCMD_RECURSE from the options. 4648 */ 4649 options &= ~MDCMD_RECURSE; 4650 4651 /* for each soft partition */ 4652 rval = 0; 4653 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4654 rval = -1; 4655 4656 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4657 np = nlp->namep; 4658 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4659 rval = -1; 4660 break; 4661 } 4662 /* 4663 * meta_reset_all calls us twice to get soft 4664 * partitions at the top and bottom of the stack. 4665 * thus, if we have a parent, we'll get deleted 4666 * on the next call. 4667 */ 4668 if (MD_HAS_PARENT(msp->common.parent)) 4669 continue; 4670 /* 4671 * If this is a multi-node set, we send a series 4672 * of individual metaclear commands. 4673 */ 4674 if (meta_is_mn_set(sp, ep)) { 4675 if (meta_mn_send_metaclear_command(sp, 4676 np->cname, options, 0, ep) != 0) { 4677 rval = -1; 4678 break; 4679 } 4680 } else { 4681 if (meta_sp_reset(sp, np, options, ep) != 0) { 4682 rval = -1; 4683 break; 4684 } 4685 } 4686 } 4687 /* cleanup return status */ 4688 metafreenamelist(spnlp); 4689 return (rval); 4690 } 4691 4692 /* check the name */ 4693 if (metachkmeta(np, ep) != 0) 4694 return (-1); 4695 4696 /* get the unit structure */ 4697 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4698 return (-1); 4699 4700 /* clear out reset parameters */ 4701 (void) memset(&reset_params, 0, sizeof (reset_params)); 4702 4703 /* if our child is a metadevice, we need to deparent/reparent it */ 4704 if (metaismeta(msp->compnamep)) { 4705 /* get sp's on this component */ 4706 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4707 &spnlp, 1, ep)) <= 0) 4708 /* no sp's on this device. error! */ 4709 return (-1); 4710 else if (num_sp == 1) 4711 /* last sp on this device, so we deparent */ 4712 reset_params.new_parent = MD_NO_PARENT; 4713 else { 4714 /* have to reparent this metadevice */ 4715 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4716 if (meta_getminor(nlp->namep->dev) == 4717 meta_getminor(np->dev)) 4718 continue; 4719 /* 4720 * this isn't the softpart we are deleting, 4721 * so use this device as the new parent. 4722 */ 4723 reset_params.new_parent = 4724 meta_getminor(nlp->namep->dev); 4725 break; 4726 } 4727 } 4728 metafreenamelist(spnlp); 4729 } 4730 4731 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4732 return (-1); 4733 4734 return (0); 4735 } 4736 4737 /* 4738 * FUNCTION: meta_sp_reset_component() 4739 * INPUT: sp - the set name of the device to reset 4740 * name - the string name of the device to reset 4741 * options - metaclear options 4742 * OUTPUT: ep - return error pointer 4743 * RETURNS: int - 0 success, -1 error 4744 * PURPOSE: provides the ability to delete all soft partitions on a 4745 * specified device (metaclear -p). It first gets all of the 4746 * soft partitions on the component and then deletes each one 4747 * individually. 4748 */ 4749 int 4750 meta_sp_reset_component( 4751 mdsetname_t *sp, 4752 char *name, 4753 mdcmdopts_t options, 4754 md_error_t *ep 4755 ) 4756 { 4757 mdname_t *compnp, *np; 4758 mdnamelist_t *spnlp = NULL; 4759 mdnamelist_t *nlp = NULL; 4760 md_sp_t *msp; 4761 int count; 4762 md_sp_reset_t reset_params; 4763 4764 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4765 return (-1); 4766 4767 /* If we're starting out with no soft partitions, it's an error */ 4768 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4769 if (count == 0) 4770 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4771 else if (count < 0) 4772 return (-1); 4773 4774 /* 4775 * clear all soft partitions on this component. 4776 * NOTE: we reparent underlying metadevices as we go so that 4777 * things stay sane. Also, if we encounter an error, we stop 4778 * and go no further in case recovery might be needed. 4779 */ 4780 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4781 /* clear out reset parameters */ 4782 (void) memset(&reset_params, 0, sizeof (reset_params)); 4783 4784 /* check the name */ 4785 np = nlp->namep; 4786 4787 if (metachkmeta(np, ep) != 0) { 4788 metafreenamelist(spnlp); 4789 return (-1); 4790 } 4791 4792 /* get the unit structure */ 4793 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4794 metafreenamelist(spnlp); 4795 return (-1); 4796 } 4797 4798 /* have to deparent/reparent metadevices */ 4799 if (metaismeta(compnp)) { 4800 if (nlp->next == NULL) 4801 reset_params.new_parent = MD_NO_PARENT; 4802 else 4803 reset_params.new_parent = 4804 meta_getminor(spnlp->next->namep->dev); 4805 } 4806 4807 /* clear soft partition */ 4808 if (meta_sp_reset_common(sp, np, msp, reset_params, 4809 options, ep) < 0) { 4810 metafreenamelist(spnlp); 4811 return (-1); 4812 } 4813 } 4814 metafreenamelist(spnlp); 4815 return (0); 4816 } 4817 4818 /* 4819 * ************************************************************************** 4820 * Grow (metattach) Functions * 4821 * ************************************************************************** 4822 */ 4823 4824 /* 4825 * FUNCTION: meta_sp_attach() 4826 * INPUT: sp - the set name of the device to attach to 4827 * np - the name of the device to attach to 4828 * addsize - the unparsed string holding the amount of space to add 4829 * options - metattach options 4830 * alignment - data alignment 4831 * OUTPUT: ep - return error pointer 4832 * RETURNS: int - 0 success, -1 error 4833 * PURPOSE: grows a soft partition by reading in the existing unit 4834 * structure and setting its state to Growing, allocating more 4835 * space (similar to meta_create_sp()), updating the watermarks, 4836 * and then writing out the new unit structure in the Okay state. 4837 */ 4838 int 4839 meta_sp_attach( 4840 mdsetname_t *sp, 4841 mdname_t *np, 4842 char *addsize, 4843 mdcmdopts_t options, 4844 sp_ext_length_t alignment, 4845 md_error_t *ep 4846 ) 4847 { 4848 md_grow_params_t grow_params; 4849 sp_ext_length_t grow_len; /* amount to grow */ 4850 mp_unit_t *mp, *new_un; 4851 mdname_t *compnp = NULL; 4852 4853 sp_ext_node_t *extlist = NULL; 4854 int numexts; 4855 mdnamelist_t *spnlp = NULL; 4856 int count; 4857 md_sp_t *msp; 4858 daddr_t start_block; 4859 4860 /* should have the same set */ 4861 assert(sp != NULL); 4862 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4863 4864 /* check name */ 4865 if (metachkmeta(np, ep) != 0) 4866 return (-1); 4867 4868 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4869 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4870 } 4871 4872 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4873 return (-1); 4874 4875 /* make sure we don't have a parent */ 4876 if (MD_HAS_PARENT(mp->c.un_parent)) { 4877 Free(mp); 4878 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4879 } 4880 4881 if (getenv(META_SP_DEBUG)) { 4882 meta_sp_debug("meta_sp_attach: Unit structure before new " 4883 "space:\n"); 4884 meta_sp_printunit(mp); 4885 } 4886 4887 /* 4888 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4889 * If this was not the case we would suffer the following 4890 * assertion failure: 4891 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4892 * file meta_check.x, line 315 4893 * I guess this is because we have not "seen" this drive before 4894 * and hence hit the failure - this is of course the attach routine 4895 */ 4896 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4897 Free(mp); 4898 return (-1); 4899 } 4900 4901 /* metakeyname does not fill in the key. */ 4902 compnp->key = mp->un_key; 4903 4904 /* work out the space on the component that we are dealing with */ 4905 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4906 4907 /* 4908 * see if the component has been soft partitioned yet, or if an 4909 * error occurred. 4910 */ 4911 if (count == 0) { 4912 Free(mp); 4913 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4914 } else if (count < 0) { 4915 Free(mp); 4916 return (-1); 4917 } 4918 4919 /* 4920 * seed extlist with reserved space at the beginning of the volume and 4921 * enough space for the end watermark. The end watermark always gets 4922 * updated, but if the underlying device changes size it may not be 4923 * pointed to until the extent before it is updated. Since the 4924 * end of the reserved space is where the first watermark starts, 4925 * the reserved extent should never be marked for updating. 4926 */ 4927 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4928 MD_DISKADDR_ERROR) { 4929 Free(mp); 4930 return (-1); 4931 } 4932 4933 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4934 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4935 meta_sp_list_insert(NULL, NULL, &extlist, 4936 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4937 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4938 4939 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4940 Free(mp); 4941 return (-1); 4942 } 4943 4944 metafreenamelist(spnlp); 4945 4946 if (getenv(META_SP_DEBUG)) { 4947 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4948 meta_sp_list_dump(extlist); 4949 } 4950 4951 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4952 4953 assert(mp->un_numexts >= 1); 4954 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4955 mp->un_ext[mp->un_numexts - 1].un_poff, 4956 (alignment > 0) ? alignment : 4957 meta_sp_get_default_alignment(sp, compnp, ep)); 4958 4959 if (numexts == -1) { 4960 Free(mp); 4961 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4962 } 4963 4964 /* allocate new unit structure and copy in old unit */ 4965 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4966 grow_len, numexts, ep)) == NULL) { 4967 Free(mp); 4968 return (-1); 4969 } 4970 Free(mp); 4971 4972 /* If running in dryrun mode (-n option), we're done here */ 4973 if ((options & MDCMD_DOIT) == 0) { 4974 if (options & MDCMD_PRINT) { 4975 (void) printf(dgettext(TEXT_DOMAIN, 4976 "%s: Soft Partition would grow\n"), 4977 np->cname); 4978 (void) fflush(stdout); 4979 } 4980 return (0); 4981 } 4982 4983 if (getenv(META_SP_DEBUG)) { 4984 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 4985 meta_sp_printunit(new_un); 4986 } 4987 4988 assert(new_un != NULL); 4989 4990 (void) memset(&grow_params, 0, sizeof (grow_params)); 4991 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 4992 grow_params.options = MD_CRO_64BIT; 4993 new_un->c.un_revision |= MD_64BIT_META_DEV; 4994 } else { 4995 grow_params.options = MD_CRO_32BIT; 4996 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 4997 } 4998 grow_params.mnum = MD_SID(new_un); 4999 grow_params.size = new_un->c.un_size; 5000 grow_params.mdp = (uintptr_t)new_un; 5001 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5002 5003 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5004 np->cname) != 0) { 5005 (void) mdstealerror(ep, &grow_params.mde); 5006 return (-1); 5007 } 5008 5009 /* update all watermarks */ 5010 5011 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5012 return (-1); 5013 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5014 return (-1); 5015 5016 5017 /* second phase of commit, set status to MD_SP_OK */ 5018 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5019 return (-1); 5020 5021 meta_invalidate_name(np); 5022 5023 if (options & MDCMD_PRINT) { 5024 (void) printf(dgettext(TEXT_DOMAIN, 5025 "%s: Soft Partition has been grown\n"), 5026 np->cname); 5027 (void) fflush(stdout); 5028 } 5029 5030 return (0); 5031 } 5032 5033 /* 5034 * ************************************************************************** 5035 * Recovery (metarecover) Functions * 5036 * ************************************************************************** 5037 */ 5038 5039 /* 5040 * FUNCTION: meta_recover_sp() 5041 * INPUT: sp - the name of the set we are recovering on 5042 * compnp - name pointer for device we are recovering on 5043 * argc - argument count 5044 * argv - left over arguments not parsed by metarecover command 5045 * options - metarecover options 5046 * OUTPUT: ep - return error pointer 5047 * RETURNS: int - 0 - success, -1 - error 5048 * PURPOSE: parse soft partitioning-specific metarecover options and 5049 * dispatch to the appropriate function to handle recovery. 5050 */ 5051 int 5052 meta_recover_sp( 5053 mdsetname_t *sp, 5054 mdname_t *compnp, 5055 int argc, 5056 char *argv[], 5057 mdcmdopts_t options, 5058 md_error_t *ep 5059 ) 5060 { 5061 md_set_desc *sd; 5062 5063 if (argc > 1) { 5064 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5065 argc, argv); 5066 return (-1); 5067 } 5068 5069 /* 5070 * For a MN set, this operation must be performed on the master 5071 * as it is responsible for maintaining the watermarks 5072 */ 5073 if (!metaislocalset(sp)) { 5074 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5075 return (-1); 5076 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5077 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5078 sd->sd_mn_master_nodenm, NULL, NULL); 5079 return (-1); 5080 } 5081 } 5082 if (argc == 0) { 5083 /* 5084 * if no additional arguments are passed, metarecover should 5085 * validate both on-disk and metadb structures as well as 5086 * checking that both are consistent with each other 5087 */ 5088 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5089 return (-1); 5090 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5091 return (-1); 5092 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5093 return (-1); 5094 } else if (strcmp(argv[0], "-d") == 0) { 5095 /* 5096 * Ensure that there is no existing valid record for this 5097 * soft-partition. If there is we have nothing to do. 5098 */ 5099 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5100 return (-1); 5101 /* validate and recover from on-disk structures */ 5102 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5103 return (-1); 5104 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5105 return (-1); 5106 } else if (strcmp(argv[0], "-m") == 0) { 5107 /* validate and recover from metadb structures */ 5108 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5109 return (-1); 5110 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5111 return (-1); 5112 } else { 5113 /* syntax error */ 5114 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5115 argc, argv); 5116 return (-1); 5117 } 5118 5119 return (0); 5120 } 5121 5122 /* 5123 * FUNCTION: meta_sp_display_exthdr() 5124 * INPUT: none 5125 * OUTPUT: none 5126 * RETURNS: void 5127 * PURPOSE: print header line for sp_ext_node_t information. to be used 5128 * in conjunction with meta_sp_display_ext(). 5129 */ 5130 static void 5131 meta_sp_display_exthdr(void) 5132 { 5133 (void) printf("%20s %5s %7s %20s %20s\n", 5134 dgettext(TEXT_DOMAIN, "Name"), 5135 dgettext(TEXT_DOMAIN, "Seq#"), 5136 dgettext(TEXT_DOMAIN, "Type"), 5137 dgettext(TEXT_DOMAIN, "Offset"), 5138 dgettext(TEXT_DOMAIN, "Length")); 5139 } 5140 5141 5142 /* 5143 * FUNCTION: meta_sp_display_ext() 5144 * INPUT: ext - extent to display 5145 * OUTPUT: none 5146 * RETURNS: void 5147 * PURPOSE: print selected fields from sp_ext_node_t. 5148 */ 5149 static void 5150 meta_sp_display_ext(sp_ext_node_t *ext) 5151 { 5152 /* print extent information */ 5153 if (ext->ext_namep != NULL) 5154 (void) printf("%20s ", ext->ext_namep->cname); 5155 else 5156 (void) printf("%20s ", "NONE"); 5157 5158 (void) printf("%5u ", ext->ext_seq); 5159 5160 switch (ext->ext_type) { 5161 case EXTTYP_ALLOC: 5162 (void) printf("%7s ", "ALLOC"); 5163 break; 5164 case EXTTYP_FREE: 5165 (void) printf("%7s ", "FREE"); 5166 break; 5167 case EXTTYP_RESERVED: 5168 (void) printf("%7s ", "RESV"); 5169 break; 5170 case EXTTYP_END: 5171 (void) printf("%7s ", "END"); 5172 break; 5173 default: 5174 (void) printf("%7s ", "INVLD"); 5175 break; 5176 } 5177 5178 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5179 } 5180 5181 5182 /* 5183 * FUNCTION: meta_sp_checkseq() 5184 * INPUT: extlist - list of extents to be checked 5185 * OUTPUT: none 5186 * RETURNS: int - 0 - success, -1 - error 5187 * PURPOSE: check soft partition sequence numbers. this function assumes 5188 * that a list of extents representing 1 or more soft partitions 5189 * is passed in sorted in sequence number order. within a 5190 * single soft partition, there may not be any missing or 5191 * duplicate sequence numbers. 5192 */ 5193 static int 5194 meta_sp_checkseq(sp_ext_node_t *extlist) 5195 { 5196 sp_ext_node_t *ext; 5197 5198 assert(extlist != NULL); 5199 5200 for (ext = extlist; 5201 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5202 ext = ext->ext_next) { 5203 if (ext->ext_next->ext_namep != NULL && 5204 strcmp(ext->ext_next->ext_namep->cname, 5205 ext->ext_namep->cname) != 0) 5206 continue; 5207 5208 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5209 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5210 "%s: sequence numbers are " 5211 "incorrect: %d should be %d\n"), 5212 ext->ext_next->ext_namep->cname, 5213 ext->ext_next->ext_seq, ext->ext_seq + 1); 5214 return (-1); 5215 } 5216 } 5217 return (0); 5218 } 5219 5220 5221 /* 5222 * FUNCTION: meta_sp_resolve_name_conflict() 5223 * INPUT: sp - name of set we're are recovering in. 5224 * old_np - name pointer of soft partition we found on disk. 5225 * OUTPUT: new_np - name pointer for new soft partition name. 5226 * ep - error pointer returned. 5227 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5228 * PURPOSE: Check to see if the name of one of the soft partitions we found 5229 * on disk already exists in the metadb. If so, prompt for a new 5230 * name. In addition, we keep a static array of names that 5231 * will be recovered from this device since these names don't 5232 * exist in the configuration at this point but cannot be 5233 * recovered more than once. 5234 */ 5235 static int 5236 meta_sp_resolve_name_conflict( 5237 mdsetname_t *sp, 5238 mdname_t *old_np, 5239 mdname_t **new_np, 5240 md_error_t *ep 5241 ) 5242 { 5243 char yesno[255]; 5244 char *yes; 5245 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5246 int nunits; 5247 static int *used_names = NULL; 5248 5249 assert(old_np != NULL); 5250 5251 if (used_names == NULL) { 5252 if ((nunits = meta_get_nunits(ep)) < 0) 5253 return (-1); 5254 used_names = Zalloc(nunits * sizeof (int)); 5255 } 5256 5257 /* see if it exists already */ 5258 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5259 metagetmiscname(old_np, ep) == NULL) { 5260 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5261 return (-1); 5262 else { 5263 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5264 mdclrerror(ep); 5265 return (0); 5266 } 5267 } 5268 5269 /* name exists, ask the user for a new one */ 5270 (void) printf(dgettext(TEXT_DOMAIN, 5271 "WARNING: A soft partition named %s was found in the extent\n" 5272 "headers, but this name already exists in the metadb " 5273 "configuration.\n" 5274 "In order to continue recovery you must supply\n" 5275 "a new name for this soft partition.\n"), old_np->cname); 5276 (void) printf(dgettext(TEXT_DOMAIN, 5277 "Would you like to continue and supply a new name? (yes/no) ")); 5278 5279 (void) fflush(stdout); 5280 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5281 (strlen(yesno) == 1)) 5282 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5283 dgettext(TEXT_DOMAIN, "no")); 5284 yes = dgettext(TEXT_DOMAIN, "yes"); 5285 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5286 return (-1); 5287 } 5288 5289 (void) fflush(stdin); 5290 5291 /* get the new name */ 5292 for (;;) { 5293 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5294 "for this soft partition (dXXXX) ")); 5295 (void) fflush(stdout); 5296 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5297 (void) strcpy(newname, ""); 5298 5299 /* remove newline character */ 5300 if (newname[strlen(newname) - 1] == '\n') 5301 newname[strlen(newname) - 1] = '\0'; 5302 5303 if (!(is_metaname(newname)) || 5304 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5305 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5306 "Invalid metadevice name\n")); 5307 (void) fflush(stderr); 5308 continue; 5309 } 5310 5311 if ((*new_np = metaname(&sp, newname, 5312 META_DEVICE, ep)) == NULL) { 5313 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5314 "Invalid metadevice name\n")); 5315 (void) fflush(stderr); 5316 continue; 5317 } 5318 5319 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5320 /* make sure the name isn't already being used */ 5321 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5322 metagetmiscname(*new_np, ep) != NULL) { 5323 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5324 "That name already exists\n")); 5325 continue; 5326 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5327 return (-1); 5328 5329 break; 5330 } 5331 5332 /* got a new name, place in used array and return */ 5333 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5334 mdclrerror(ep); 5335 return (1); 5336 } 5337 5338 /* 5339 * FUNCTION: meta_sp_validate_wm() 5340 * INPUT: sp - set name we are recovering in 5341 * compnp - name pointer for device we are recovering from 5342 * options - metarecover options 5343 * OUTPUT: ep - error pointer returned 5344 * RETURNS: int - 0 - success, -1 - error 5345 * PURPOSE: validate and display watermark configuration. walk the 5346 * on-disk watermark structures and validate the information 5347 * found within. since a watermark configuration is 5348 * "self-defining", the act of traversing the watermarks 5349 * is part of the validation process. 5350 */ 5351 static int 5352 meta_sp_validate_wm( 5353 mdsetname_t *sp, 5354 mdname_t *compnp, 5355 mdcmdopts_t options, 5356 md_error_t *ep 5357 ) 5358 { 5359 sp_ext_node_t *extlist = NULL; 5360 sp_ext_node_t *ext; 5361 int num_sps = 0; 5362 int rval; 5363 5364 if ((options & MDCMD_VERBOSE) != 0) 5365 (void) printf(dgettext(TEXT_DOMAIN, 5366 "Verifying on-disk structures on %s.\n"), 5367 compnp->cname); 5368 5369 /* 5370 * for each watermark, build an ext_node, place on list. 5371 */ 5372 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5373 meta_sp_cmp_by_nameseq, ep); 5374 5375 if ((options & MDCMD_VERBOSE) != 0) { 5376 /* print out what we found */ 5377 if (extlist == NULL) 5378 (void) printf(dgettext(TEXT_DOMAIN, 5379 "No extent headers found on %s.\n"), 5380 compnp->cname); 5381 else { 5382 (void) printf(dgettext(TEXT_DOMAIN, 5383 "The following extent headers were found on %s.\n"), 5384 compnp->cname); 5385 meta_sp_display_exthdr(); 5386 } 5387 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5388 meta_sp_display_ext(ext); 5389 } 5390 5391 if (rval < 0) { 5392 (void) printf(dgettext(TEXT_DOMAIN, 5393 "%s: On-disk structures invalid or " 5394 "no soft partitions found.\n"), 5395 compnp->cname); 5396 return (-1); 5397 } 5398 5399 assert(extlist != NULL); 5400 5401 /* count number of soft partitions */ 5402 for (ext = extlist; 5403 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5404 ext = ext->ext_next) { 5405 if (ext->ext_next != NULL && 5406 ext->ext_next->ext_namep != NULL && 5407 strcmp(ext->ext_next->ext_namep->cname, 5408 ext->ext_namep->cname) == 0) 5409 continue; 5410 num_sps++; 5411 } 5412 5413 if ((options & MDCMD_VERBOSE) != 0) 5414 (void) printf(dgettext(TEXT_DOMAIN, 5415 "Found %d soft partition(s) on %s.\n"), num_sps, 5416 compnp->cname); 5417 5418 if (num_sps == 0) { 5419 (void) printf(dgettext(TEXT_DOMAIN, 5420 "%s: No soft partitions.\n"), compnp->cname); 5421 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5422 } 5423 5424 /* check sequence numbers */ 5425 if ((options & MDCMD_VERBOSE) != 0) 5426 (void) printf(dgettext(TEXT_DOMAIN, 5427 "Checking sequence numbers.\n")); 5428 5429 if (meta_sp_checkseq(extlist) != 0) 5430 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5431 5432 return (0); 5433 } 5434 5435 /* 5436 * FUNCTION: meta_sp_validate_unit() 5437 * INPUT: sp - name of set we are recovering in 5438 * compnp - name of component we are recovering from 5439 * options - metarecover options 5440 * OUTPUT: ep - error pointer returned 5441 * RETURNS: int - 0 - success, -1 - error 5442 * PURPOSE: validate and display metadb configuration. begin by getting 5443 * all soft partitions built on the specified component. get 5444 * the unit structure for each one and validate the fields within. 5445 */ 5446 static int 5447 meta_sp_validate_unit( 5448 mdsetname_t *sp, 5449 mdname_t *compnp, 5450 mdcmdopts_t options, 5451 md_error_t *ep 5452 ) 5453 { 5454 md_sp_t *msp; 5455 mdnamelist_t *spnlp = NULL; 5456 mdnamelist_t *namep = NULL; 5457 int count; 5458 uint_t extn; 5459 sp_ext_length_t size; 5460 5461 if ((options & MDCMD_VERBOSE) != 0) 5462 (void) printf(dgettext(TEXT_DOMAIN, 5463 "%s: Validating soft partition metadb entries.\n"), 5464 compnp->cname); 5465 5466 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5467 return (-1); 5468 5469 /* get all soft partitions on component */ 5470 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5471 5472 if (count == 0) { 5473 (void) printf(dgettext(TEXT_DOMAIN, 5474 "%s: No soft partitions.\n"), compnp->cname); 5475 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5476 } else if (count < 0) { 5477 return (-1); 5478 } 5479 5480 /* Now go through the soft partitions and check each one */ 5481 for (namep = spnlp; namep != NULL; namep = namep->next) { 5482 mdname_t *curnp = namep->namep; 5483 sp_ext_offset_t curvoff; 5484 5485 /* get the unit structure */ 5486 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5487 return (-1); 5488 5489 /* verify generic unit structure parameters */ 5490 if ((options & MDCMD_VERBOSE) != 0) 5491 (void) printf(dgettext(TEXT_DOMAIN, 5492 "\nVerifying device %s.\n"), 5493 curnp->cname); 5494 5495 /* 5496 * MD_SP_LAST is an invalid state and is always the 5497 * highest numbered. 5498 */ 5499 if (msp->status >= MD_SP_LAST) { 5500 (void) printf(dgettext(TEXT_DOMAIN, 5501 "%s: status value %u is out of range.\n"), 5502 curnp->cname, msp->status); 5503 return (mdmderror(ep, MDE_RECOVER_FAILED, 5504 0, curnp->cname)); 5505 } else if ((options & MDCMD_VERBOSE) != 0) { 5506 uint_t tstate = 0; 5507 5508 if (metaismeta(msp->compnamep)) { 5509 if (meta_get_tstate(msp->common.namep->dev, 5510 &tstate, ep) != 0) 5511 return (-1); 5512 } 5513 (void) printf(dgettext(TEXT_DOMAIN, 5514 "%s: Status \"%s\" is valid.\n"), 5515 curnp->cname, meta_sp_status_to_name(msp->status, 5516 tstate & MD_DEV_ERRORED)); 5517 } 5518 5519 /* Now verify each extent */ 5520 if ((options & MDCMD_VERBOSE) != 0) 5521 (void) printf("%14s %21s %21s %21s\n", 5522 dgettext(TEXT_DOMAIN, "Extent Number"), 5523 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5524 dgettext(TEXT_DOMAIN, "Physical Offset"), 5525 dgettext(TEXT_DOMAIN, "Length")); 5526 5527 curvoff = 0ULL; 5528 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5529 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5530 5531 if ((options & MDCMD_VERBOSE) != 0) 5532 (void) printf("%14u %21llu %21llu %21llu\n", 5533 extn, extp->voff, extp->poff, extp->len); 5534 5535 if (extp->voff != curvoff) { 5536 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5537 "%s: virtual offset for extent %u " 5538 "is inconsistent, expected %llu, " 5539 "got %llu.\n"), curnp->cname, extn, 5540 curvoff, extp->voff); 5541 return (mdmderror(ep, MDE_RECOVER_FAILED, 5542 0, compnp->cname)); 5543 } 5544 5545 /* make sure extent does not drop off the end */ 5546 if ((extp->poff + extp->len) == size) { 5547 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5548 "%s: extent %u at offset %llu, " 5549 "length %llu exceeds the size of the " 5550 "device, %llu.\n"), curnp->cname, 5551 extn, extp->poff, extp->len, size); 5552 return (mdmderror(ep, MDE_RECOVER_FAILED, 5553 0, compnp->cname)); 5554 } 5555 5556 curvoff += extp->len; 5557 } 5558 } 5559 if (options & MDCMD_PRINT) { 5560 (void) printf(dgettext(TEXT_DOMAIN, 5561 "%s: Soft Partition metadb configuration is valid\n"), 5562 compnp->cname); 5563 } 5564 return (0); 5565 } 5566 5567 /* 5568 * FUNCTION: meta_sp_validate_wm_and_unit() 5569 * INPUT: sp - name of set we are recovering in 5570 * compnp - name of device we are recovering from 5571 * options - metarecover options 5572 * OUTPUT: ep - error pointer returned 5573 * RETURNS: int - 0 - success, -1 error 5574 * PURPOSE: cross-validate and display watermarks and metadb records. 5575 * get both the unit structures for the soft partitions built 5576 * on the specified component and the watermarks found on that 5577 * component and check to make sure they are consistent with 5578 * each other. 5579 */ 5580 static int 5581 meta_sp_validate_wm_and_unit( 5582 mdsetname_t *sp, 5583 mdname_t *np, 5584 mdcmdopts_t options, 5585 md_error_t *ep 5586 ) 5587 { 5588 sp_ext_node_t *wmlist = NULL; 5589 sp_ext_node_t *unitlist = NULL; 5590 sp_ext_node_t *unitext; 5591 sp_ext_node_t *wmext; 5592 sp_ext_offset_t tmpunitoff; 5593 mdnamelist_t *spnlp = NULL; 5594 int count; 5595 int rval = 0; 5596 int verbose = (options & MDCMD_VERBOSE); 5597 5598 /* get unit structure list */ 5599 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5600 if (count <= 0) 5601 return (-1); 5602 5603 meta_sp_list_insert(NULL, NULL, &unitlist, 5604 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5605 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5606 5607 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5608 metafreenamelist(spnlp); 5609 return (-1); 5610 } 5611 5612 metafreenamelist(spnlp); 5613 5614 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5615 5616 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5617 meta_sp_cmp_by_offset, ep) < 0) { 5618 meta_sp_list_free(&unitlist); 5619 return (-1); 5620 } 5621 5622 if (getenv(META_SP_DEBUG)) { 5623 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5624 meta_sp_list_dump(unitlist); 5625 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5626 meta_sp_list_dump(wmlist); 5627 } 5628 5629 /* 5630 * step through both lists and compare allocated nodes. Free 5631 * nodes and end watermarks may differ between the two but 5632 * that's generally ok, and if they're wrong will typically 5633 * cause misplaced allocated extents. 5634 */ 5635 if (verbose) 5636 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5637 "allocations match extent headers.\n"), np->cname); 5638 5639 unitext = unitlist; 5640 wmext = wmlist; 5641 while ((wmext != NULL) && (unitext != NULL)) { 5642 /* find next allocated extents in each list */ 5643 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5644 wmext = wmext->ext_next; 5645 5646 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5647 unitext = unitext->ext_next; 5648 5649 if (wmext == NULL || unitext == NULL) 5650 break; 5651 5652 if (verbose) { 5653 (void) printf(dgettext(TEXT_DOMAIN, 5654 "Metadb extent:\n")); 5655 meta_sp_display_exthdr(); 5656 meta_sp_display_ext(unitext); 5657 (void) printf(dgettext(TEXT_DOMAIN, 5658 "Extent header extent:\n")); 5659 meta_sp_display_exthdr(); 5660 meta_sp_display_ext(wmext); 5661 (void) printf("\n"); 5662 } 5663 5664 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5665 rval = -1; 5666 5667 /* 5668 * if the offsets aren't equal, only increment the 5669 * lowest one in hopes of getting the lists back in sync. 5670 */ 5671 tmpunitoff = unitext->ext_offset; 5672 if (unitext->ext_offset <= wmext->ext_offset) 5673 unitext = unitext->ext_next; 5674 if (wmext->ext_offset <= tmpunitoff) 5675 wmext = wmext->ext_next; 5676 } 5677 5678 /* 5679 * if both lists aren't at the end then there are extra 5680 * allocated nodes in one of them. 5681 */ 5682 if (wmext != NULL) { 5683 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5684 "%s: extent headers contain allocations not in " 5685 "the metadb\n\n"), np->cname); 5686 rval = -1; 5687 } 5688 5689 if (unitext != NULL) { 5690 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5691 "%s: metadb contains allocations not in the extent " 5692 "headers\n\n"), np->cname); 5693 rval = -1; 5694 } 5695 5696 if (options & MDCMD_PRINT) { 5697 if (rval == 0) { 5698 (void) printf(dgettext(TEXT_DOMAIN, 5699 "%s: Soft Partition metadb matches extent " 5700 "header configuration\n"), np->cname); 5701 } else { 5702 (void) printf(dgettext(TEXT_DOMAIN, 5703 "%s: Soft Partition metadb does not match extent " 5704 "header configuration\n"), np->cname); 5705 } 5706 } 5707 5708 return (rval); 5709 } 5710 5711 /* 5712 * FUNCTION: meta_sp_validate_exts() 5713 * INPUT: compnp - name pointer for device we are recovering from 5714 * wmext - extent node representing watermark 5715 * unitext - extent node from unit structure 5716 * OUTPUT: ep - return error pointer 5717 * RETURNS: int - 0 - succes, mdmderror return code - error 5718 * PURPOSE: Takes two extent nodes and checks them against each other. 5719 * offset, length, sequence number, set, and name are compared. 5720 */ 5721 static int 5722 meta_sp_validate_exts( 5723 mdname_t *compnp, 5724 sp_ext_node_t *wmext, 5725 sp_ext_node_t *unitext, 5726 md_error_t *ep 5727 ) 5728 { 5729 if (wmext->ext_offset != unitext->ext_offset) { 5730 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5731 "%s: unit structure and extent header offsets differ.\n"), 5732 compnp->cname); 5733 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5734 } 5735 5736 if (wmext->ext_length != unitext->ext_length) { 5737 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5738 "%s: unit structure and extent header lengths differ.\n"), 5739 compnp->cname); 5740 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5741 } 5742 5743 if (wmext->ext_seq != unitext->ext_seq) { 5744 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5745 "%s: unit structure and extent header sequence numbers " 5746 "differ.\n"), compnp->cname); 5747 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5748 } 5749 5750 if (wmext->ext_type != unitext->ext_type) { 5751 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5752 "%s: unit structure and extent header types differ.\n"), 5753 compnp->cname); 5754 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5755 } 5756 5757 /* 5758 * If one has a set pointer and the other doesn't, error. 5759 * If both extents have setnames, then make sure they match 5760 * If both are NULL, it's ok, they match. 5761 */ 5762 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5763 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5764 "%s: unit structure and extent header set values " 5765 "differ.\n"), compnp->cname); 5766 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5767 } 5768 5769 if (unitext->ext_setp != NULL) { 5770 if (strcmp(unitext->ext_setp->setname, 5771 wmext->ext_setp->setname) != 0) { 5772 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5773 "%s: unit structure and extent header set names " 5774 "differ.\n"), compnp->cname); 5775 return (mdmderror(ep, MDE_RECOVER_FAILED, 5776 0, compnp->cname)); 5777 } 5778 } 5779 5780 /* 5781 * If one has a name pointer and the other doesn't, error. 5782 * If both extents have names, then make sure they match 5783 * If both are NULL, it's ok, they match. 5784 */ 5785 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5786 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5787 "%s: unit structure and extent header name values " 5788 "differ.\n"), compnp->cname); 5789 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5790 } 5791 5792 if (unitext->ext_namep != NULL) { 5793 if (strcmp(wmext->ext_namep->cname, 5794 unitext->ext_namep->cname) != 0) { 5795 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5796 "%s: unit structure and extent header names " 5797 "differ.\n"), compnp->cname); 5798 return (mdmderror(ep, MDE_RECOVER_FAILED, 5799 0, compnp->cname)); 5800 } 5801 } 5802 5803 return (0); 5804 } 5805 5806 /* 5807 * FUNCTION: update_sp_status() 5808 * INPUT: sp - name of set we are recovering in 5809 * minors - pointer to an array of soft partition minor numbers 5810 * num_sps - number of minor numbers in array 5811 * status - new status to be applied to all soft parts in array 5812 * mn_set - set if current set is a multi-node set 5813 * OUTPUT: ep - return error pointer 5814 * RETURNS: int - 0 - success, -1 - error 5815 * PURPOSE: update status of soft partitions to new status. minors is an 5816 * array of minor numbers to apply the new status to. 5817 * If mn_set is set, a message is sent to all nodes in the 5818 * cluster to update the status locally. 5819 */ 5820 static int 5821 update_sp_status( 5822 mdsetname_t *sp, 5823 minor_t *minors, 5824 int num_sps, 5825 sp_status_t status, 5826 bool_t mn_set, 5827 md_error_t *ep 5828 ) 5829 { 5830 int i; 5831 int err = 0; 5832 5833 if (mn_set) { 5834 md_mn_msg_sp_setstat_t sp_setstat_params; 5835 int result; 5836 md_mn_result_t *resp = NULL; 5837 5838 for (i = 0; i < num_sps; i++) { 5839 sp_setstat_params.sp_setstat_mnum = minors[i]; 5840 sp_setstat_params.sp_setstat_status = status; 5841 5842 result = mdmn_send_message(sp->setno, 5843 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 5844 (char *)&sp_setstat_params, 5845 sizeof (sp_setstat_params), 5846 &resp, ep); 5847 if (resp != NULL) { 5848 if (resp->mmr_exitval != 0) 5849 err = -1; 5850 free_result(resp); 5851 } 5852 if (result != 0) { 5853 err = -1; 5854 } 5855 } 5856 } else { 5857 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5858 err = -1; 5859 } 5860 if (err < 0) { 5861 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5862 "Error updating status on recovered soft " 5863 "partitions.\n")); 5864 } 5865 return (err); 5866 } 5867 5868 /* 5869 * FUNCTION: meta_sp_recover_from_wm() 5870 * INPUT: sp - name of set we are recovering in 5871 * compnp - name pointer for component we are recovering from 5872 * options - metarecover options 5873 * OUTPUT: ep - return error pointer 5874 * RETURNS: int - 0 - success, -1 - error 5875 * PURPOSE: update metadb records to match watermarks. begin by getting 5876 * an extlist representing all soft partitions on the component. 5877 * then build a unit structure for each soft partition. 5878 * notify user of changes, then commit each soft partition to 5879 * the metadb one at a time in the "recovering" state. update 5880 * any watermarks that may need it (to reflect possible name 5881 * changes), and, finally, set the status of all recovered 5882 * partitions to the "OK" state at once. 5883 */ 5884 static int 5885 meta_sp_recover_from_wm( 5886 mdsetname_t *sp, 5887 mdname_t *compnp, 5888 mdcmdopts_t options, 5889 md_error_t *ep 5890 ) 5891 { 5892 sp_ext_node_t *extlist = NULL; 5893 sp_ext_node_t *sp_list = NULL; 5894 sp_ext_node_t *update_list = NULL; 5895 sp_ext_node_t *ext; 5896 sp_ext_node_t *sp_ext; 5897 mp_unit_t *mp; 5898 mp_unit_t **un_array; 5899 int numexts = 0, num_sps = 0, i = 0; 5900 int err = 0; 5901 int not_recovered = 0; 5902 int committed = 0; 5903 sp_ext_length_t sp_length = 0LL; 5904 mdnamelist_t *keynlp = NULL; 5905 mdname_t *np; 5906 mdname_t *new_np; 5907 int new_name; 5908 md_set_params_t set_params; 5909 minor_t *minors = NULL; 5910 char yesno[255]; 5911 char *yes; 5912 bool_t mn_set = 0; 5913 md_set_desc *sd; 5914 mm_unit_t *mm; 5915 md_set_mmown_params_t *ownpar = NULL; 5916 int comp_is_mirror = 0; 5917 5918 /* 5919 * if this component appears in another metadevice already, do 5920 * NOT recover from it. 5921 */ 5922 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5923 return (-1); 5924 5925 /* set flag if dealing with a MN set */ 5926 if (!metaislocalset(sp)) { 5927 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5928 return (-1); 5929 } 5930 if (MD_MNSET_DESC(sd)) 5931 mn_set = 1; 5932 } 5933 /* 5934 * for each watermark, build an ext_node, place on list. 5935 */ 5936 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5937 meta_sp_cmp_by_nameseq, ep) < 0) 5938 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5939 5940 assert(extlist != NULL); 5941 5942 /* count number of soft partitions */ 5943 for (ext = extlist; 5944 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5945 ext = ext->ext_next) { 5946 if (ext->ext_next != NULL && 5947 ext->ext_next->ext_namep != NULL && 5948 strcmp(ext->ext_next->ext_namep->cname, 5949 ext->ext_namep->cname) == 0) 5950 continue; 5951 num_sps++; 5952 } 5953 5954 /* allocate array of unit structure pointers */ 5955 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5956 5957 /* 5958 * build unit structures from list of ext_nodes. 5959 */ 5960 for (ext = extlist; 5961 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5962 ext = ext->ext_next) { 5963 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5964 &sp_list, ext->ext_offset, ext->ext_length, 5965 ext->ext_type, ext->ext_seq, ext->ext_flags, 5966 meta_sp_cmp_by_nameseq); 5967 5968 numexts++; 5969 sp_length += ext->ext_length - MD_SP_WMSIZE; 5970 5971 if (ext->ext_next != NULL && 5972 ext->ext_next->ext_namep != NULL && 5973 strcmp(ext->ext_next->ext_namep->cname, 5974 ext->ext_namep->cname) == 0) 5975 continue; 5976 5977 /* 5978 * if we made it here, we are at a soft partition 5979 * boundary in the list. 5980 */ 5981 if (getenv(META_SP_DEBUG)) { 5982 meta_sp_debug("meta_recover_from_wm: dumping wm " 5983 "list:\n"); 5984 meta_sp_list_dump(sp_list); 5985 } 5986 5987 assert(sp_list != NULL); 5988 assert(sp_list->ext_namep != NULL); 5989 5990 if ((new_name = meta_sp_resolve_name_conflict(sp, 5991 sp_list->ext_namep, &new_np, ep)) < 0) { 5992 err = 1; 5993 goto out; 5994 } else if (new_name) { 5995 for (sp_ext = sp_list; 5996 sp_ext != NULL; 5997 sp_ext = sp_ext->ext_next) { 5998 /* 5999 * insert into the update list for 6000 * watermark update. 6001 */ 6002 meta_sp_list_insert(sp_ext->ext_setp, 6003 new_np, &update_list, sp_ext->ext_offset, 6004 sp_ext->ext_length, sp_ext->ext_type, 6005 sp_ext->ext_seq, EXTFLG_UPDATE, 6006 meta_sp_cmp_by_offset); 6007 } 6008 6009 } 6010 if (options & MDCMD_DOIT) { 6011 /* store name in namespace */ 6012 if (mn_set) { 6013 /* send message to all nodes to return key */ 6014 md_mn_msg_addkeyname_t *send_params; 6015 int result; 6016 md_mn_result_t *resp = NULL; 6017 int message_size; 6018 6019 message_size = sizeof (*send_params) + 6020 strlen(compnp->cname) + 1; 6021 send_params = Zalloc(message_size); 6022 send_params->addkeyname_setno = sp->setno; 6023 (void) strcpy(&send_params->addkeyname_name[0], 6024 compnp->cname); 6025 result = mdmn_send_message(sp->setno, 6026 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6027 (char *)send_params, message_size, &resp, 6028 ep); 6029 Free(send_params); 6030 if (resp != NULL) { 6031 if (resp->mmr_exitval >= 0) { 6032 compnp->key = 6033 (mdkey_t)resp->mmr_exitval; 6034 } else { 6035 err = 1; 6036 free_result(resp); 6037 goto out; 6038 } 6039 free_result(resp); 6040 } 6041 if (result != 0) { 6042 err = 1; 6043 goto out; 6044 } 6045 (void) metanamelist_append(&keynlp, compnp); 6046 } else { 6047 if (add_key_name(sp, compnp, &keynlp, 6048 ep) != 0) { 6049 err = 1; 6050 goto out; 6051 } 6052 } 6053 } 6054 6055 /* create the unit structure */ 6056 if ((mp = meta_sp_createunit( 6057 (new_name) ? new_np : sp_list->ext_namep, compnp, 6058 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6059 err = 1; 6060 goto out; 6061 } 6062 6063 if (getenv(META_SP_DEBUG)) { 6064 meta_sp_debug("meta_sp_recover_from_wm: " 6065 "printing newly created unit structure"); 6066 meta_sp_printunit(mp); 6067 } 6068 6069 /* place in unit structure array */ 6070 un_array[i++] = mp; 6071 6072 /* free sp_list */ 6073 meta_sp_list_free(&sp_list); 6074 sp_list = NULL; 6075 numexts = 0; 6076 sp_length = 0LL; 6077 } 6078 6079 /* display configuration updates */ 6080 (void) printf(dgettext(TEXT_DOMAIN, 6081 "The following soft partitions were found and will be added to\n" 6082 "your metadevice configuration.\n")); 6083 (void) printf("%5s %15s %18s\n", 6084 dgettext(TEXT_DOMAIN, "Name"), 6085 dgettext(TEXT_DOMAIN, "Size"), 6086 dgettext(TEXT_DOMAIN, "No. of Extents")); 6087 for (i = 0; i < num_sps; i++) { 6088 (void) printf("%5s%lu %15llu %9d\n", "d", 6089 MD_MIN2UNIT(MD_SID(un_array[i])), 6090 un_array[i]->un_length, un_array[i]->un_numexts); 6091 } 6092 6093 if (!(options & MDCMD_DOIT)) { 6094 not_recovered = 1; 6095 goto out; 6096 } 6097 6098 /* ask user for confirmation */ 6099 (void) printf(dgettext(TEXT_DOMAIN, 6100 "WARNING: You are about to add one or more soft partition\n" 6101 "metadevices to your metadevice configuration. If there\n" 6102 "appears to be an error in the soft partition(s) displayed\n" 6103 "above, do NOT proceed with this recovery operation.\n")); 6104 (void) printf(dgettext(TEXT_DOMAIN, 6105 "Are you sure you want to do this (yes/no)? ")); 6106 6107 (void) fflush(stdout); 6108 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6109 (strlen(yesno) == 1)) 6110 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6111 dgettext(TEXT_DOMAIN, "no")); 6112 yes = dgettext(TEXT_DOMAIN, "yes"); 6113 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6114 not_recovered = 1; 6115 goto out; 6116 } 6117 6118 /* commit records one at a time */ 6119 for (i = 0; i < num_sps; i++) { 6120 (void) memset(&set_params, 0, sizeof (set_params)); 6121 set_params.mnum = MD_SID(un_array[i]); 6122 set_params.size = (un_array[i])->c.un_size; 6123 set_params.mdp = (uintptr_t)(un_array[i]); 6124 set_params.options = 6125 meta_check_devicesize(un_array[i]->un_length); 6126 if (set_params.options == MD_CRO_64BIT) { 6127 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6128 } else { 6129 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6130 } 6131 MD_SETDRIVERNAME(&set_params, MD_SP, 6132 MD_MIN2SET(set_params.mnum)); 6133 6134 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6135 6136 /* 6137 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6138 */ 6139 if (mn_set) { 6140 md_mn_msg_iocset_t send_params; 6141 int result; 6142 md_mn_result_t *resp = NULL; 6143 int mess_size; 6144 6145 /* 6146 * Calculate message size. md_mn_msg_iocset_t only 6147 * contains one extent, so increment the size to 6148 * include all extents 6149 */ 6150 mess_size = sizeof (send_params) - 6151 sizeof (mp_ext_t) + 6152 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6153 6154 send_params.iocset_params = set_params; 6155 (void) memcpy(&send_params.unit, un_array[i], 6156 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6157 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6158 result = mdmn_send_message(sp->setno, 6159 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 6160 (char *)&send_params, mess_size, &resp, 6161 ep); 6162 if (resp != NULL) { 6163 if (resp->mmr_exitval != 0) 6164 err = 1; 6165 free_result(resp); 6166 } 6167 if (result != 0) { 6168 err = 1; 6169 } 6170 } else { 6171 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6172 np->cname) != 0) { 6173 err = 1; 6174 } 6175 } 6176 6177 if (err == 1) { 6178 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6179 "%s: Error committing record to metadb.\n"), 6180 np->cname); 6181 goto out; 6182 } 6183 6184 /* note that we've committed a record */ 6185 if (!committed) 6186 committed = 1; 6187 6188 /* update any watermarks that need it */ 6189 if (update_list != NULL) { 6190 md_sp_t *msp; 6191 6192 /* 6193 * Check to see if we're trying to create a partition 6194 * on a mirror. If so we may have to enforce an 6195 * ownership change before writing the watermark out. 6196 */ 6197 if (metaismeta(compnp)) { 6198 char *miscname; 6199 6200 miscname = metagetmiscname(compnp, ep); 6201 if (miscname != NULL) 6202 comp_is_mirror = (strcmp(miscname, 6203 MD_MIRROR) == 0); 6204 else 6205 comp_is_mirror = 0; 6206 } 6207 /* 6208 * If this is a MN set and the component is a mirror, 6209 * change ownership to this node in order to write the 6210 * watermarks 6211 */ 6212 if (mn_set && comp_is_mirror) { 6213 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6214 if (mm == NULL) { 6215 err = 1; 6216 goto out; 6217 } else { 6218 err = meta_mn_change_owner(&ownpar, 6219 sp->setno, 6220 meta_getminor(compnp->dev), 6221 sd->sd_mn_mynode->nd_nodeid, 6222 MD_MN_MM_PREVENT_CHANGE | 6223 MD_MN_MM_SPAWN_THREAD); 6224 if (err != 0) 6225 goto out; 6226 } 6227 } 6228 6229 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6230 err = 1; 6231 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6232 "%s: Error updating extent headers.\n"), 6233 np->cname); 6234 goto out; 6235 } 6236 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6237 err = 1; 6238 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6239 "%s: Error updating extent headers " 6240 "on disk.\n"), np->cname); 6241 goto out; 6242 } 6243 } 6244 /* 6245 * If we have changed ownership earlier and prevented any 6246 * ownership changes, we can now allow ownership changes 6247 * again. 6248 */ 6249 if (ownpar) { 6250 (void) meta_mn_change_owner(&ownpar, sp->setno, 6251 ownpar->d.mnum, 6252 ownpar->d.owner, 6253 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6254 } 6255 } 6256 6257 /* update status of all soft partitions to OK */ 6258 minors = Zalloc(num_sps * sizeof (minor_t)); 6259 for (i = 0; i < num_sps; i++) 6260 minors[i] = MD_SID(un_array[i]); 6261 6262 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6263 if (err != 0) 6264 goto out; 6265 6266 if (options & MDCMD_PRINT) 6267 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6268 "Soft Partitions recovered from device.\n"), 6269 compnp->cname); 6270 out: 6271 /* free memory */ 6272 if (extlist != NULL) 6273 meta_sp_list_free(&extlist); 6274 if (sp_list != NULL) 6275 meta_sp_list_free(&sp_list); 6276 if (update_list != NULL) 6277 meta_sp_list_free(&update_list); 6278 if (un_array != NULL) { 6279 for (i = 0; i < num_sps; i++) 6280 Free(un_array[i]); 6281 Free(un_array); 6282 } 6283 if (minors != NULL) 6284 Free(minors); 6285 if (ownpar != NULL) 6286 Free(ownpar); 6287 (void) fflush(stdout); 6288 6289 if ((keynlp != NULL) && (committed != 1)) { 6290 /* 6291 * if we haven't committed any softparts, either because of an 6292 * error or because the user decided not to proceed, delete 6293 * namelist key for the component 6294 */ 6295 if (mn_set) { 6296 mdnamelist_t *p; 6297 6298 for (p = keynlp; (p != NULL); p = p->next) { 6299 mdname_t *np = p->namep; 6300 md_mn_msg_delkeyname_t send_params; 6301 md_mn_result_t *resp = NULL; 6302 6303 send_params.delkeyname_dev = np->dev; 6304 send_params.delkeyname_setno = sp->setno; 6305 send_params.delkeyname_key = np->key; 6306 (void) mdmn_send_message(sp->setno, 6307 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6308 (char *)&send_params, sizeof (send_params), 6309 &resp, ep); 6310 if (resp != NULL) { 6311 free_result(resp); 6312 } 6313 } 6314 } else { 6315 (void) del_key_names(sp, keynlp, NULL); 6316 } 6317 } 6318 6319 metafreenamelist(keynlp); 6320 6321 if (err) 6322 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6323 6324 if (not_recovered) 6325 if (options & MDCMD_PRINT) 6326 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6327 "Soft Partitions NOT recovered from device.\n"), 6328 compnp->cname); 6329 return (0); 6330 } 6331 6332 /* 6333 * FUNCTION: meta_sp_recover_from_unit() 6334 * INPUT: sp - name of set we are recovering in 6335 * compnp - name of component we are recovering from 6336 * options - metarecover options 6337 * OUTPUT: ep - return error pointer 6338 * RETURNS: int - 0 - success, -1 - error 6339 * PURPOSE: update watermarks to match metadb records. begin by getting 6340 * a namelist representing all soft partitions on the specified 6341 * component. then, build an extlist representing the soft 6342 * partitions, filling in the freespace extents. notify user 6343 * of changes, place all soft partitions into the "recovering" 6344 * state and update the watermarks. finally, return all soft 6345 * partitions to the "OK" state. 6346 */ 6347 static int 6348 meta_sp_recover_from_unit( 6349 mdsetname_t *sp, 6350 mdname_t *compnp, 6351 mdcmdopts_t options, 6352 md_error_t *ep 6353 ) 6354 { 6355 mdnamelist_t *spnlp = NULL; 6356 mdnamelist_t *nlp = NULL; 6357 sp_ext_node_t *ext = NULL; 6358 sp_ext_node_t *extlist = NULL; 6359 int count; 6360 char yesno[255]; 6361 char *yes; 6362 int rval = 0; 6363 minor_t *minors = NULL; 6364 int i; 6365 md_sp_t *msp; 6366 md_set_desc *sd; 6367 bool_t mn_set = 0; 6368 daddr_t start_block; 6369 6370 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6371 if (count <= 0) 6372 return (-1); 6373 6374 /* set flag if dealing with a MN set */ 6375 if (!metaislocalset(sp)) { 6376 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6377 return (-1); 6378 } 6379 if (MD_MNSET_DESC(sd)) 6380 mn_set = 1; 6381 } 6382 /* 6383 * Save the XDR unit structure for one of the soft partitions; 6384 * we'll use this later to provide metadevice context to 6385 * update the watermarks so the device can be resolved by 6386 * devid instead of dev_t. 6387 */ 6388 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6389 metafreenamelist(spnlp); 6390 return (-1); 6391 } 6392 6393 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6394 MD_DISKADDR_ERROR) { 6395 return (-1); 6396 } 6397 6398 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6399 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6400 meta_sp_list_insert(NULL, NULL, &extlist, 6401 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6402 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6403 6404 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6405 metafreenamelist(spnlp); 6406 return (-1); 6407 } 6408 6409 assert(extlist != NULL); 6410 if ((options & MDCMD_VERBOSE) != 0) { 6411 (void) printf(dgettext(TEXT_DOMAIN, 6412 "Updating extent headers on device %s from metadb.\n\n"), 6413 compnp->cname); 6414 (void) printf(dgettext(TEXT_DOMAIN, 6415 "The following extent headers will be written:\n")); 6416 meta_sp_display_exthdr(); 6417 } 6418 6419 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6420 6421 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6422 6423 /* mark every node for updating except the reserved space */ 6424 if (ext->ext_type != EXTTYP_RESERVED) { 6425 ext->ext_flags |= EXTFLG_UPDATE; 6426 6427 /* print extent information */ 6428 if ((options & MDCMD_VERBOSE) != 0) 6429 meta_sp_display_ext(ext); 6430 } 6431 } 6432 6433 /* request verification and then update all watermarks */ 6434 if ((options & MDCMD_DOIT) != 0) { 6435 6436 (void) printf(dgettext(TEXT_DOMAIN, 6437 "\nWARNING: You are about to overwrite portions of %s\n" 6438 "with soft partition metadata. The extent headers will be\n" 6439 "written to match the existing metadb configuration. If\n" 6440 "the device was not previously setup with this\n" 6441 "configuration, data loss may result.\n\n"), 6442 compnp->cname); 6443 (void) printf(dgettext(TEXT_DOMAIN, 6444 "Are you sure you want to do this (yes/no)? ")); 6445 6446 (void) fflush(stdout); 6447 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6448 (strlen(yesno) == 1)) 6449 (void) snprintf(yesno, sizeof (yesno), 6450 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6451 yes = dgettext(TEXT_DOMAIN, "yes"); 6452 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6453 /* place soft partitions into recovering state */ 6454 minors = Zalloc(count * sizeof (minor_t)); 6455 for (nlp = spnlp, i = 0; 6456 nlp != NULL && i < count; 6457 nlp = nlp->next, i++) { 6458 assert(nlp->namep != NULL); 6459 minors[i] = meta_getminor(nlp->namep->dev); 6460 } 6461 if (update_sp_status(sp, minors, count, 6462 MD_SP_RECOVER, mn_set, ep) != 0) { 6463 rval = -1; 6464 goto out; 6465 } 6466 6467 /* update the watermarks */ 6468 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6469 rval = -1; 6470 goto out; 6471 } 6472 6473 if (options & MDCMD_PRINT) { 6474 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6475 "Soft Partitions recovered from metadb\n"), 6476 compnp->cname); 6477 } 6478 6479 /* return soft partitions to the OK state */ 6480 if (update_sp_status(sp, minors, count, 6481 MD_SP_OK, mn_set, ep) != 0) { 6482 rval = -1; 6483 goto out; 6484 } 6485 6486 rval = 0; 6487 goto out; 6488 } 6489 } 6490 6491 if (options & MDCMD_PRINT) { 6492 (void) printf(dgettext(TEXT_DOMAIN, 6493 "%s: Soft Partitions NOT recovered from metadb\n"), 6494 compnp->cname); 6495 } 6496 6497 out: 6498 if (minors != NULL) 6499 Free(minors); 6500 metafreenamelist(spnlp); 6501 meta_sp_list_free(&extlist); 6502 (void) fflush(stdout); 6503 return (rval); 6504 } 6505 6506 6507 /* 6508 * FUNCTION: meta_sp_update_abr() 6509 * INPUT: sp - name of set we are recovering in 6510 * OUTPUT: ep - return error pointer 6511 * RETURNS: int - 0 - success, -1 - error 6512 * PURPOSE: update the ABR state for all soft partitions in the set. This 6513 * is called when joining a set. It sends a message to the master 6514 * node for each soft partition to get the value of tstate and 6515 * then sets ABR ,if required, by opening the sp, setting ABR 6516 * and then closing the sp. This approach is taken rather that 6517 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6518 * the case when we have another node simultaneously unsetting ABR. 6519 */ 6520 int 6521 meta_sp_update_abr( 6522 mdsetname_t *sp, 6523 md_error_t *ep 6524 ) 6525 { 6526 mdnamelist_t *devnlp = NULL; 6527 mdnamelist_t *p; 6528 mdname_t *devnp = NULL; 6529 md_unit_t *un; 6530 char fname[MAXPATHLEN]; 6531 int mnum, fd; 6532 volcap_t vc; 6533 uint_t tstate; 6534 6535 6536 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6537 return (-1); 6538 } 6539 6540 /* Exit if no soft partitions in this set */ 6541 if (devnlp == NULL) 6542 return (0); 6543 6544 /* For each soft partition */ 6545 for (p = devnlp; (p != NULL); p = p->next) { 6546 devnp = p->namep; 6547 6548 /* check if this is a top level metadevice */ 6549 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6550 goto out; 6551 if (MD_HAS_PARENT(MD_PARENT(un))) { 6552 Free(un); 6553 continue; 6554 } 6555 Free(un); 6556 6557 /* Get tstate from Master */ 6558 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6559 mdname_t *np; 6560 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6561 ep); 6562 if (np) { 6563 md_perror(dgettext(TEXT_DOMAIN, 6564 "Unable to get tstate for %s"), np->cname); 6565 } 6566 continue; 6567 } 6568 /* If not set on the master, nothing to do */ 6569 if (!(tstate & MD_ABR_CAP)) 6570 continue; 6571 6572 mnum = meta_getminor(devnp->dev); 6573 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6574 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6575 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6576 md_perror(dgettext(TEXT_DOMAIN, 6577 "Could not open device %s"), fname); 6578 continue; 6579 } 6580 6581 /* Set ABR state */ 6582 vc.vc_info = 0; 6583 vc.vc_set = 0; 6584 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6585 (void) close(fd); 6586 continue; 6587 } 6588 6589 vc.vc_set = DKV_ABR_CAP; 6590 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6591 (void) close(fd); 6592 goto out; 6593 } 6594 6595 (void) close(fd); 6596 } 6597 metafreenamelist(devnlp); 6598 return (0); 6599 out: 6600 metafreenamelist(devnlp); 6601 return (-1); 6602 } 6603 6604 /* 6605 * FUNCTION: meta_mn_sp_update_abr() 6606 * INPUT: arg - Given set. 6607 * PURPOSE: update the ABR state for all soft partitions in the set by 6608 * forking a process to call meta_sp_update_abr() 6609 * This function is only called via rpc.metad when adding a node 6610 * to a set, ie this node is beong joined to the set by another 6611 * node. 6612 */ 6613 void * 6614 meta_mn_sp_update_abr(void *arg) 6615 { 6616 set_t setno = *((set_t *)arg); 6617 mdsetname_t *sp; 6618 md_error_t mde = mdnullerror; 6619 int fval; 6620 6621 /* should have a set */ 6622 assert(setno != NULL); 6623 6624 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6625 mde_perror(&mde, ""); 6626 return (NULL); 6627 } 6628 6629 if (!(meta_is_mn_set(sp, &mde))) { 6630 mde_perror(&mde, ""); 6631 return (NULL); 6632 } 6633 6634 /* fork a process */ 6635 if ((fval = md_daemonize(sp, &mde)) != 0) { 6636 /* 6637 * md_daemonize will fork off a process. The is the 6638 * parent or error. 6639 */ 6640 if (fval > 0) { 6641 return (NULL); 6642 } 6643 mde_perror(&mde, ""); 6644 return (NULL); 6645 } 6646 /* 6647 * Child process should never return back to rpc.metad, but 6648 * should exit. 6649 * Flush all internally cached data inherited from parent process 6650 * since cached data will be cleared when parent process RPC request 6651 * has completed (which is possibly before this child process 6652 * can complete). 6653 * Child process can retrieve and cache its own copy of data from 6654 * rpc.metad that won't be changed by the parent process. 6655 * 6656 * Reset md_in_daemon since this child will be a client of rpc.metad 6657 * not part of the rpc.metad daemon itself. 6658 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6659 * this thread is rpc.metad or any other thread. (If this thread 6660 * was rpc.metad it could use some short circuit code to get data 6661 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6662 */ 6663 md_in_daemon = 0; 6664 metaflushsetname(sp); 6665 sr_cache_flush_setno(setno); 6666 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6667 mde_perror(&mde, ""); 6668 md_exit(sp, 1); 6669 } 6670 6671 6672 /* 6673 * Closing stdin/out/err here. 6674 */ 6675 (void) close(0); 6676 (void) close(1); 6677 (void) close(2); 6678 assert(fval == 0); 6679 6680 (void) meta_sp_update_abr(sp, &mde); 6681 6682 md_exit(sp, 0); 6683 /*NOTREACHED*/ 6684 return (NULL); 6685 } 6686 6687 int 6688 meta_sp_check_component( 6689 mdsetname_t *sp, 6690 mdname_t *np, 6691 md_error_t *ep 6692 ) 6693 { 6694 md_sp_t *msp; 6695 minor_t mnum = 0; 6696 md_dev64_t dev = 0; 6697 mdnm_params_t nm; 6698 md_getdevs_params_t mgd; 6699 side_t sideno; 6700 char *miscname; 6701 md_dev64_t *mydev = NULL; 6702 char *pname, *t; 6703 char *ctd_name; 6704 char *devname; 6705 int len; 6706 int rval = -1; 6707 6708 (void) memset(&nm, '\0', sizeof (nm)); 6709 if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL) 6710 return (-1); 6711 6712 if ((miscname = metagetmiscname(np, ep)) == NULL) 6713 return (-1); 6714 6715 sideno = getmyside(sp, ep); 6716 6717 meta_sp_debug("meta_sp_check_component: %s is on %s key: %d" 6718 " dev: %llu\n", 6719 np->cname, msp->compnamep->cname, msp->compnamep->key, 6720 msp->compnamep->dev); 6721 6722 /* 6723 * Now get the data from the unit structure. The compnamep stuff 6724 * contains the data from the namespace and we need the un_dev 6725 * from the unit structure. 6726 */ 6727 (void) memset(&mgd, '\0', sizeof (mgd)); 6728 MD_SETDRIVERNAME(&mgd, miscname, sp->setno); 6729 mgd.cnt = 1; /* sp's only have one subdevice */ 6730 mgd.mnum = meta_getminor(np->dev); 6731 6732 mydev = Zalloc(sizeof (*mydev)); 6733 mgd.devs = (uintptr_t)mydev; 6734 6735 if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) { 6736 meta_sp_debug("meta_sp_check_component: ioctl failed\n"); 6737 (void) mdstealerror(ep, &mgd.mde); 6738 rval = 0; 6739 goto out; 6740 } else if (mgd.cnt <= 0) { 6741 assert(mgd.cnt >= 0); 6742 rval = 0; 6743 goto out; 6744 } 6745 6746 /* Get the devname from the name space. */ 6747 if ((devname = meta_getnmentbykey(sp->setno, sideno, 6748 msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) { 6749 meta_sp_debug("meta_sp_check_component: key %d not" 6750 "found\n", msp->compnamep->key); 6751 goto out; 6752 } 6753 6754 meta_sp_debug("dev %s from component: (%lu, %lu)\n", 6755 devname, 6756 meta_getmajor(*mydev), 6757 meta_getminor(*mydev)); 6758 meta_sp_debug("minor from the namespace: %lu\n", mnum); 6759 6760 if (mnum != meta_getminor(*mydev)) { 6761 /* 6762 * The minor numbers are different. Update the namespace 6763 * with the information from the component. 6764 */ 6765 6766 t = strrchr(devname, '/'); 6767 t++; 6768 ctd_name = Strdup(t); 6769 6770 meta_sp_debug("meta_sp_check_component: ctd_name: %s\n", 6771 ctd_name); 6772 6773 len = strlen(devname); 6774 t = strrchr(devname, '/'); 6775 t++; 6776 pname = Zalloc((len - strlen(t)) + 1); 6777 (void) strncpy(pname, devname, (len - strlen(t))); 6778 meta_sp_debug("pathname: %s\n", pname); 6779 6780 meta_sp_debug("updating the minor number to %lu\n", nm.mnum); 6781 6782 if (meta_update_namespace(sp->setno, sideno, 6783 ctd_name, *mydev, msp->compnamep->key, pname, 6784 ep) != 0) { 6785 goto out; 6786 } 6787 } 6788 out: 6789 if (pname != NULL) 6790 Free(pname); 6791 if (ctd_name != NULL) 6792 Free(ctd_name); 6793 if (devname != NULL) 6794 Free(devname); 6795 if (mydev != NULL) 6796 Free(mydev); 6797 return (rval); 6798 } 6799