1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Just in case we're not in a build environment, make sure that 28 * TEXT_DOMAIN gets set to something. 29 */ 30 #if !defined(TEXT_DOMAIN) 31 #define TEXT_DOMAIN "SYS_TEST" 32 #endif 33 34 /* 35 * soft partition operations 36 * 37 * Soft Partitions provide a virtual disk mechanism which is used to 38 * divide a large volume into many small pieces, each appearing as a 39 * separate device. A soft partition consists of a series of extents, 40 * each having an offset and a length. The extents are logically 41 * contiguous, so where the first extent leaves off the second extent 42 * picks up. Which extent a given "virtual offset" belongs to is 43 * dependent on the size of all the previous extents in the soft 44 * partition. 45 * 46 * Soft partitions are represented in memory by an extent node 47 * (sp_ext_node_t) which contains all of the information necessary to 48 * create a unit structure and update the on-disk format, called 49 * "watermarks". These extent nodes are typically kept in a doubly 50 * linked list and are manipulated by list manipulation routines. A 51 * list of extents may represent all of the soft partitions on a volume, 52 * a single soft partition, or perhaps just a set of extents that need 53 * to be updated. Extent lists may be sorted by extent or by name/seq#, 54 * depending on which compare function is used. Most of the routines 55 * require the list be sorted by offset to work, and that's the typical 56 * configuration. 57 * 58 * In order to do an allocation, knowledge of all soft partitions on the 59 * volume is required. Then free space is determined from the space 60 * that is not allocated, and new allocations can be made from the free 61 * space. Once the new allocations are made, a unit structure is created 62 * and the watermarks are updated. The status is then changed to "okay" 63 * on the unit structure to commit the transaction. If updating the 64 * watermarks fails, the unit structure is in an intermediate state and 65 * the driver will not allow access to the device. 66 * 67 * A typical sequence of events is: 68 * 1. Fetch the list of names for all soft partitions on a volume 69 * meta_sp_get_by_component() 70 * 2. Construct an extent list from the name list 71 * meta_sp_extlist_from_namelist() 72 * 3. Fill the gaps in the extent list with free extents 73 * meta_sp_list_freefill() 74 * 4. Allocate from the free extents 75 * meta_sp_alloc_by_len() 76 * meta_sp_alloc_by_list() 77 * 5. Create the unit structure from the extent list 78 * meta_sp_createunit() 79 * meta_sp_updateunit() 80 * 6. Write out the watermarks 81 * meta_sp_update_wm() 82 * 7. Set the status to "Okay" 83 * meta_sp_setstatus() 84 * 85 */ 86 87 #include <stdio.h> 88 #include <meta.h> 89 #include "meta_repartition.h" 90 #include <sys/lvm/md_sp.h> 91 #include <sys/lvm/md_crc.h> 92 #include <strings.h> 93 #include <sys/lvm/md_mirror.h> 94 #include <sys/bitmap.h> 95 96 extern int md_in_daemon; 97 98 typedef struct sp_ext_node { 99 struct sp_ext_node *ext_next; /* next element */ 100 struct sp_ext_node *ext_prev; /* previous element */ 101 sp_ext_type_t ext_type; /* type of extent */ 102 sp_ext_offset_t ext_offset; /* starting offset */ 103 sp_ext_length_t ext_length; /* length of this node */ 104 uint_t ext_flags; /* extent flags */ 105 uint32_t ext_seq; /* watermark seq no */ 106 mdname_t *ext_namep; /* name pointer */ 107 mdsetname_t *ext_setp; /* set pointer */ 108 } sp_ext_node_t; 109 110 /* extent flags */ 111 #define EXTFLG_UPDATE (1) 112 113 /* Extent node compare function for list sorting */ 114 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 115 116 117 /* Function Prototypes */ 118 119 /* Debugging Functions */ 120 static void meta_sp_debug(char *format, ...); 121 static void meta_sp_printunit(mp_unit_t *mp); 122 123 /* Misc Support Functions */ 124 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 125 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 126 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 127 md_error_t *ep); 128 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 129 mdnamelist_t **nlpp, int force, md_error_t *ep); 130 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 131 mdname_t *compnp, md_error_t *ep); 132 133 /* Extent List Manipulation Functions */ 134 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 135 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 136 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 137 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 138 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 139 static void meta_sp_list_free(sp_ext_node_t **head); 140 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 141 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 142 sp_ext_type_t exttype, int exclude_wm); 143 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 144 sp_ext_offset_t offset); 145 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 146 sp_ext_length_t size); 147 static void meta_sp_list_dump(sp_ext_node_t *head); 148 static int meta_sp_list_overlaps(sp_ext_node_t *head); 149 150 /* Extent List Query Functions */ 151 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 152 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 153 sp_ext_length_t alignment); 154 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 155 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 156 md_error_t *ep); 157 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 158 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 159 160 161 /* Extent Allocation Functions */ 162 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 163 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 164 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 165 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 166 sp_ext_node_t **extlist, sp_ext_length_t *lp, 167 sp_ext_offset_t last_off, sp_ext_length_t alignment); 168 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 169 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 170 171 /* Extent List Population Functions */ 172 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 173 sp_ext_node_t **extlist, md_error_t *ep); 174 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 175 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 176 177 /* Print (metastat) Functions */ 178 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 179 mdprtopts_t options, md_error_t *ep); 180 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 181 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 182 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 183 184 /* Watermark Manipulation Functions */ 185 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 186 sp_ext_node_t *extlist, md_error_t *ep); 187 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 188 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 189 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 190 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 191 md_error_t *ep); 192 193 /* Unit Structure Manipulation Functions */ 194 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 195 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 196 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 197 sp_status_t status, md_error_t *ep); 198 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 199 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 200 md_error_t *ep); 201 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 202 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 203 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 204 int *repart_options, md_error_t *ep); 205 206 /* Reset (metaclear) Functions */ 207 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 208 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 209 210 /* Recovery (metarecover) Functions */ 211 static void meta_sp_display_exthdr(void); 212 static void meta_sp_display_ext(sp_ext_node_t *ext); 213 static int meta_sp_checkseq(sp_ext_node_t *extlist); 214 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 215 mdname_t **, md_error_t *); 216 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 217 mdcmdopts_t options, md_error_t *ep); 218 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 219 mdcmdopts_t options, md_error_t *ep); 220 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 221 mdcmdopts_t options, md_error_t *ep); 222 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 223 sp_ext_node_t *unitext, md_error_t *ep); 224 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 225 mdcmdopts_t options, md_error_t *ep); 226 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 227 mdcmdopts_t options, md_error_t *ep); 228 229 /* 230 * Private Constants 231 */ 232 233 static const int FORCE_RELOAD_CACHE = 1; 234 static const uint_t NO_FLAGS = 0; 235 static const sp_ext_offset_t NO_OFFSET = 0ULL; 236 static const uint_t NO_SEQUENCE_NUMBER = 0; 237 static const int ONE_SOFT_PARTITION = 1; 238 239 static unsigned long *sp_parent_printed[MD_MAXSETS]; 240 241 #define TEST_SOFT_PARTITION_NAMEP NULL 242 #define TEST_SETNAMEP NULL 243 244 #define EXCLUDE_WM (1) 245 #define INCLUDE_WM (0) 246 247 #define SP_UNALIGNED (0LL) 248 249 /* 250 * ************************************************************************** 251 * Debugging Functions * 252 * ************************************************************************** 253 */ 254 255 /*PRINTFLIKE1*/ 256 static void 257 meta_sp_debug(char *format, ...) 258 { 259 static int debug; 260 static int debug_set = 0; 261 va_list ap; 262 263 if (!debug_set) { 264 debug = getenv(META_SP_DEBUG) ? 1 : 0; 265 debug_set = 1; 266 } 267 268 if (debug) { 269 va_start(ap, format); 270 (void) vfprintf(stderr, format, ap); 271 va_end(ap); 272 } 273 } 274 275 static void 276 meta_sp_printunit(mp_unit_t *mp) 277 { 278 int i; 279 280 if (mp == NULL) 281 return; 282 283 /* print the common fields we know about */ 284 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 285 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 286 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 287 288 /* sp-specific fields */ 289 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 290 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 291 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 292 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 293 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 294 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 295 296 /* print extent information */ 297 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 298 for (i = 0; i < mp->un_numexts; i++) { 299 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 300 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 301 mp->un_ext[i].un_len); 302 } 303 } 304 305 /* 306 * FUNCTION: meta_sp_parsesize() 307 * INPUT: s - the string to parse 308 * OUTPUT: *szp - disk block count (0 for "all") 309 * RETURNS: -1 for error, 0 for success 310 * PURPOSE: parses the command line parameter that specifies the 311 * requested size of a soft partition. The input string 312 * is either the literal "all" or a numeric value 313 * followed by a single character, b for disk blocks, k 314 * for kilobytes, m for megabytes, g for gigabytes, or t 315 * for terabytes. p for petabytes and e for exabytes 316 * have been added as undocumented features for future 317 * expansion. For example, 100m is 100 megabytes, while 318 * 50g is 50 gigabytes. All values are rounded up to the 319 * nearest block size. 320 */ 321 int 322 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 323 { 324 if (s == NULL || szp == NULL) { 325 return (-1); 326 } 327 328 /* Check for literal "all" */ 329 if (strcasecmp(s, "all") == 0) { 330 *szp = 0; 331 return (0); 332 } 333 334 return (meta_sp_parsesizestring(s, szp)); 335 } 336 337 /* 338 * FUNCTION: meta_sp_parsesizestring() 339 * INPUT: s - the string to parse 340 * OUTPUT: *szp - disk block count 341 * RETURNS: -1 for error, 0 for success 342 * PURPOSE: parses a string that specifies size. The input string is a 343 * numeric value followed by a single character, b for disk blocks, 344 * k for kilobytes, m for megabytes, g for gigabytes, or t for 345 * terabytes. p for petabytes and e for exabytes have been added 346 * as undocumented features for future expansion. For example, 347 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 348 * are rounded up to the nearest block size. 349 */ 350 static int 351 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 352 { 353 sp_ext_length_t len = 0; 354 char len_type[2]; 355 356 if (s == NULL || szp == NULL) { 357 return (-1); 358 } 359 360 /* 361 * make sure block offset does not overflow 2^64 bytes. 362 */ 363 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 364 (len == 0LL) || 365 (len > (1LL << (64 - DEV_BSHIFT)))) 366 return (-1); 367 368 switch (len_type[0]) { 369 case 'B': 370 case 'b': 371 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 372 break; 373 case 'K': 374 case 'k': 375 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 376 break; 377 case 'M': 378 case 'm': 379 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 380 break; 381 case 'g': 382 case 'G': 383 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 384 break; 385 case 't': 386 case 'T': 387 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 388 DEV_BSIZE)); 389 break; 390 case 'p': 391 case 'P': 392 len = lbtodb(roundup( 393 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 394 DEV_BSIZE)); 395 break; 396 case 'e': 397 case 'E': 398 len = lbtodb(roundup( 399 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 400 DEV_BSIZE)); 401 break; 402 default: 403 /* error */ 404 return (-1); 405 } 406 407 *szp = len; 408 return (0); 409 } 410 411 /* 412 * FUNCTION: meta_sp_setgeom() 413 * INPUT: np - the underlying device to setup geometry for 414 * compnp - the underlying device to setup geometry for 415 * mp - the unit structure to set the geometry for 416 * OUTPUT: ep - return error pointer 417 * RETURNS: int - -1 if error, 0 otherwise 418 * PURPOSE: establishes geometry information for a device 419 */ 420 static int 421 meta_sp_setgeom( 422 mdname_t *np, 423 mdname_t *compnp, 424 mp_unit_t *mp, 425 md_error_t *ep 426 ) 427 { 428 mdgeom_t *geomp; 429 uint_t round_cyl = 0; 430 431 if ((geomp = metagetgeom(compnp, ep)) == NULL) 432 return (-1); 433 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 434 geomp->read_reinstruct, round_cyl, ep) != 0) 435 return (-1); 436 437 return (0); 438 } 439 440 /* 441 * FUNCTION: meta_sp_setstatus() 442 * INPUT: sp - the set name for the devices to set the status on 443 * minors - an array of minor numbers of devices to set status on 444 * num_units - number of entries in the array 445 * status - status value to set all units to 446 * OUTPUT: ep - return error pointer 447 * RETURNS: int - -1 if error, 0 success 448 * PURPOSE: sets the status of one or more soft partitions to the 449 * requested value 450 */ 451 int 452 meta_sp_setstatus( 453 mdsetname_t *sp, 454 minor_t *minors, 455 int num_units, 456 sp_status_t status, 457 md_error_t *ep 458 ) 459 { 460 md_sp_statusset_t status_params; 461 462 assert(minors != NULL); 463 464 /* update status of all soft partitions to the status passed in */ 465 (void) memset(&status_params, 0, sizeof (status_params)); 466 status_params.num_units = num_units; 467 status_params.new_status = status; 468 status_params.size = num_units * sizeof (minor_t); 469 status_params.minors = (uintptr_t)minors; 470 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 471 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 472 NULL) != 0) { 473 (void) mdstealerror(ep, &status_params.mde); 474 return (-1); 475 } 476 return (0); 477 } 478 479 /* 480 * FUNCTION: meta_get_sp_names() 481 * INPUT: sp - the set name to get soft partitions from 482 * options - options from the command line 483 * OUTPUT: nlpp - list of all soft partition names 484 * ep - return error pointer 485 * RETURNS: int - -1 if error, 0 success 486 * PURPOSE: returns a list of all soft partitions in the metadb 487 * for all devices in the specified set 488 */ 489 int 490 meta_get_sp_names( 491 mdsetname_t *sp, 492 mdnamelist_t **nlpp, 493 int options, 494 md_error_t *ep 495 ) 496 { 497 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 498 } 499 500 /* 501 * FUNCTION: meta_get_by_component() 502 * INPUT: sp - the set name to get soft partitions from 503 * compnp - the name of the device containing the soft 504 * partitions that will be returned 505 * force - 0 - reads cached namelist if available, 506 * 1 - reloads cached namelist, frees old namelist 507 * OUTPUT: nlpp - list of all soft partition names 508 * ep - return error pointer 509 * RETURNS: int - -1 error, otherwise the number of soft partitions 510 * found on the component (0 = none found). 511 * PURPOSE: returns a list of all soft partitions on a given device 512 * from the metadb information 513 */ 514 static int 515 meta_sp_get_by_component( 516 mdsetname_t *sp, 517 mdname_t *compnp, 518 mdnamelist_t **nlpp, 519 int force, 520 md_error_t *ep 521 ) 522 { 523 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 524 static int cached_count = 0; /* cached count */ 525 mdnamelist_t *spnlp = NULL; /* all sp names */ 526 mdnamelist_t *namep; /* list iterator */ 527 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 528 mdnamelist_t **cachetailpp; /* cache tail */ 529 md_sp_t *msp; /* unit structure */ 530 int count = 0; /* count of sp's */ 531 int err; 532 mdname_t *curnp; 533 534 if ((cached_list != NULL) && (!force)) { 535 /* return a copy of the cached list */ 536 for (namep = cached_list; namep != NULL; namep = namep->next) 537 tailpp = meta_namelist_append_wrapper(tailpp, 538 namep->namep); 539 return (cached_count); 540 } 541 542 /* free the cache and reset values to zeros to prepare for a new list */ 543 metafreenamelist(cached_list); 544 cached_count = 0; 545 cached_list = NULL; 546 cachetailpp = &cached_list; 547 *nlpp = NULL; 548 549 /* get all the softpartitions first of all */ 550 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 551 return (-1); 552 553 /* 554 * Now for each sp, see if it resides on the component we 555 * are interested in, if so then add it to our list 556 */ 557 for (namep = spnlp; namep != NULL; namep = namep->next) { 558 curnp = namep->namep; 559 560 /* get the unit structure */ 561 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 562 continue; 563 564 /* 565 * If the current soft partition is not on the same 566 * component, continue the search. If it is on the same 567 * component, add it to our namelist. 568 */ 569 err = meta_check_samedrive(compnp, msp->compnamep, ep); 570 if (err <= 0) { 571 /* not on the same device, check the next one */ 572 continue; 573 } 574 575 /* it's on the same drive */ 576 577 /* 578 * Check for overlapping partitions if the component is not 579 * a metadevice. 580 */ 581 if (!metaismeta(msp->compnamep)) { 582 /* 583 * if they're on the same drive, neither 584 * should be a metadevice if one isn't 585 */ 586 assert(!metaismeta(compnp)); 587 588 if (meta_check_overlap(msp->compnamep->cname, 589 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 590 continue; 591 592 /* in this case it's not an error for them to overlap */ 593 mdclrerror(ep); 594 } 595 596 /* Component is on the same device, add to the used list */ 597 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 598 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 599 curnp); 600 601 ++count; 602 ++cached_count; 603 } 604 605 assert(count == cached_count); 606 return (count); 607 608 out: 609 metafreenamelist(*nlpp); 610 *nlpp = NULL; 611 return (-1); 612 } 613 614 /* 615 * FUNCTION: meta_sp_get_default_alignment() 616 * INPUT: sp - the pertinent set name 617 * compnp - the name of the underlying component 618 * OUTPUT: ep - return error pointer 619 * RETURNS: sp_ext_length_t =0: no default alignment 620 * >0: default alignment 621 * PURPOSE: returns the default alignment for soft partitions to 622 * be built on top of the specified component or 623 * metadevice 624 */ 625 static sp_ext_length_t 626 meta_sp_get_default_alignment( 627 mdsetname_t *sp, 628 mdname_t *compnp, 629 md_error_t *ep 630 ) 631 { 632 sp_ext_length_t a = SP_UNALIGNED; 633 char *mname; 634 635 assert(compnp != NULL); 636 637 /* 638 * We treat raw devices as opaque, and assume nothing about 639 * their alignment requirements. 640 */ 641 if (!metaismeta(compnp)) 642 return (SP_UNALIGNED); 643 644 /* 645 * We already know it's a metadevice from the previous test; 646 * metagetmiscname() will tell us which metadevice type we 647 * have 648 */ 649 mname = metagetmiscname(compnp, ep); 650 if (mname == NULL) 651 goto out; 652 653 /* 654 * For a mirror, we want to deal with the stripe that is the 655 * primary side. If it happens to be asymmetrically 656 * configured, there is no simple way to fake a universal 657 * alignment. There's a chance that the least common 658 * denominator of the set of interlaces from all stripes of 659 * all submirrors would do it, but nobody that really cared 660 * that much about this issue would create an asymmetric 661 * config to start with. 662 * 663 * If the component underlying the soft partition is a mirror, 664 * then at the exit of this loop, compnp will have been 665 * updated to describe the first active submirror. 666 */ 667 if (strcmp(mname, MD_MIRROR) == 0) { 668 md_mirror_t *mp; 669 int smi; 670 md_submirror_t *smp; 671 672 mp = meta_get_mirror(sp, compnp, ep); 673 if (mp == NULL) 674 goto out; 675 676 for (smi = 0; smi < NMIRROR; smi++) { 677 678 smp = &mp->submirrors[smi]; 679 if (smp->state == SMS_UNUSED) 680 continue; 681 682 compnp = smp->submirnamep; 683 assert(compnp != NULL); 684 685 mname = metagetmiscname(compnp, ep); 686 if (mname == NULL) 687 goto out; 688 689 break; 690 } 691 692 if (smi == NMIRROR) 693 goto out; 694 } 695 696 /* 697 * Handle stripes and submirrors identically; just return the 698 * interlace of the first row. 699 */ 700 if (strcmp(mname, MD_STRIPE) == 0) { 701 md_stripe_t *stp; 702 703 stp = meta_get_stripe(sp, compnp, ep); 704 if (stp == NULL) 705 goto out; 706 707 a = stp->rows.rows_val[0].interlace; 708 goto out; 709 } 710 711 /* 712 * Raid is even more straightforward; the interlace applies to 713 * the entire device. 714 */ 715 if (strcmp(mname, MD_RAID) == 0) { 716 md_raid_t *rp; 717 718 rp = meta_get_raid(sp, compnp, ep); 719 if (rp == NULL) 720 goto out; 721 722 a = rp->interlace; 723 goto out; 724 } 725 726 /* 727 * If we have arrived here with the alignment still not set, 728 * then we expect the error to have been set by one of the 729 * routines we called. If neither is the case, something has 730 * really gone wrong above. (Probably the submirror walk 731 * failed to produce a valid submirror, but that would be 732 * really bad...) 733 */ 734 out: 735 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 736 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 737 738 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 739 mde_perror(ep, NULL); 740 } 741 742 assert((a > 0) || (!mdisok(ep))); 743 744 return (a); 745 } 746 747 748 749 /* 750 * FUNCTION: meta_check_insp() 751 * INPUT: sp - the set name for the device to check 752 * np - the name of the device to check 753 * slblk - the starting offset of the device to check 754 * nblks - the number of blocks in the device to check 755 * OUTPUT: ep - return error pointer 756 * RETURNS: int - 0 - device contains soft partitions 757 * -1 - device does not contain soft partitions 758 * PURPOSE: determines whether a device contains any soft partitions 759 */ 760 /* ARGSUSED */ 761 int 762 meta_check_insp( 763 mdsetname_t *sp, 764 mdname_t *np, 765 diskaddr_t slblk, 766 diskaddr_t nblks, 767 md_error_t *ep 768 ) 769 { 770 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 771 int count; 772 int rval; 773 774 /* check set pointer */ 775 assert(sp != NULL); 776 777 /* 778 * Get a list of the soft partitions that currently reside on 779 * the component. We should ALWAYS force reload the cache, 780 * because if we're using the md.tab, we must rebuild 781 * the list because it won't contain the previous (if any) 782 * soft partition. 783 */ 784 /* find all soft partitions on the component */ 785 count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep); 786 787 if (count == -1) { 788 rval = -1; 789 } else if (count > 0) { 790 rval = mduseerror(ep, MDE_ALREADY, np->dev, 791 spnlp->namep->cname, np->cname); 792 } else { 793 rval = 0; 794 } 795 796 metafreenamelist(spnlp); 797 return (rval); 798 } 799 800 /* 801 * ************************************************************************** 802 * Extent List Manipulation Functions * 803 * ************************************************************************** 804 */ 805 806 /* 807 * FUNCTION: meta_sp_cmp_by_nameseq() 808 * INPUT: e1 - first node to compare 809 * e2 - second node to compare 810 * OUTPUT: none 811 * RETURNS: int - =0 - nodes are equal 812 * <0 - e1 should go before e2 813 * >0 - e1 should go after e2 814 * PURPOSE: used for sorted list inserts to build a list sorted by 815 * name first and sequence number second. 816 */ 817 static int 818 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 819 { 820 int rval; 821 822 if (e1->ext_namep == NULL) 823 return (1); 824 if (e2->ext_namep == NULL) 825 return (-1); 826 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 827 return (rval); 828 829 /* the names are equal, compare sequence numbers */ 830 if (e1->ext_seq > e2->ext_seq) 831 return (1); 832 if (e1->ext_seq < e2->ext_seq) 833 return (-1); 834 /* sequence numbers are also equal */ 835 return (0); 836 } 837 838 /* 839 * FUNCTION: meta_sp_cmp_by_offset() 840 * INPUT: e1 - first node to compare 841 * e2 - second node to compare 842 * OUTPUT: none 843 * RETURNS: int - =0 - nodes are equal 844 * <0 - e1 should go before e2 845 * >0 - e1 should go after e2 846 * PURPOSE: used for sorted list inserts to build a list sorted by offset 847 */ 848 static int 849 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 850 { 851 if (e1->ext_offset > e2->ext_offset) 852 return (1); 853 if (e1->ext_offset < e2->ext_offset) 854 return (-1); 855 /* offsets are equal */ 856 return (0); 857 } 858 859 /* 860 * FUNCTION: meta_sp_list_insert() 861 * INPUT: sp - the set name for the device the node belongs to 862 * np - the name of the device the node belongs to 863 * head - the head of the list, must be NULL for empty list 864 * offset - the physical offset of this extent in sectors 865 * length - the length of this extent in sectors 866 * type - the type of the extent being inserted 867 * seq - the sequence number of the extent being inserted 868 * flags - extent flags (eg. whether it needs to be updated) 869 * compare - the compare function to use 870 * OUTPUT: head - points to the new head if a node was inserted 871 * at the beginning 872 * RETURNS: void 873 * PURPOSE: inserts an extent node into a sorted doubly linked list. 874 * The sort order is determined by the compare function. 875 * Memory is allocated for the node in this function and it 876 * is up to the caller to free it, possibly using 877 * meta_sp_list_free(). If a node is inserted at the 878 * beginning of the list, the head pointer is updated to 879 * point to the new first node. 880 */ 881 static void 882 meta_sp_list_insert( 883 mdsetname_t *sp, 884 mdname_t *np, 885 sp_ext_node_t **head, 886 sp_ext_offset_t offset, 887 sp_ext_length_t length, 888 sp_ext_type_t type, 889 uint_t seq, 890 uint_t flags, 891 ext_cmpfunc_t compare 892 ) 893 { 894 sp_ext_node_t *newext; 895 sp_ext_node_t *curext; 896 897 assert(head != NULL); 898 899 /* Don't bother adding zero length nodes */ 900 if (length == 0ULL) 901 return; 902 903 /* allocate and fill in new ext_node */ 904 newext = Zalloc(sizeof (sp_ext_node_t)); 905 906 newext->ext_offset = offset; 907 newext->ext_length = length; 908 newext->ext_flags = flags; 909 newext->ext_type = type; 910 newext->ext_seq = seq; 911 newext->ext_setp = sp; 912 newext->ext_namep = np; 913 914 /* first node in the list */ 915 if (*head == NULL) { 916 newext->ext_next = newext->ext_prev = NULL; 917 *head = newext; 918 } else if ((*compare)(*head, newext) >= 0) { 919 /* the first node has a bigger offset, so insert before it */ 920 assert((*head)->ext_prev == NULL); 921 922 newext->ext_prev = NULL; 923 newext->ext_next = *head; 924 (*head)->ext_prev = newext; 925 *head = newext; 926 } else { 927 /* 928 * find the next node whose offset is greater than 929 * the one we want to insert, or the end of the list. 930 */ 931 for (curext = *head; 932 (curext->ext_next != NULL) && 933 ((*compare)(curext->ext_next, newext) < 0); 934 (curext = curext->ext_next)) 935 ; 936 937 /* link the new node in after the current node */ 938 newext->ext_next = curext->ext_next; 939 newext->ext_prev = curext; 940 941 if (curext->ext_next != NULL) 942 curext->ext_next->ext_prev = newext; 943 944 curext->ext_next = newext; 945 } 946 } 947 948 /* 949 * FUNCTION: meta_sp_list_free() 950 * INPUT: head - the head of the list, must be NULL for empty list 951 * OUTPUT: head - points to NULL on return 952 * RETURNS: void 953 * PURPOSE: walks a double linked extent list and frees each node 954 */ 955 static void 956 meta_sp_list_free(sp_ext_node_t **head) 957 { 958 sp_ext_node_t *ext; 959 sp_ext_node_t *next; 960 961 assert(head != NULL); 962 963 ext = *head; 964 while (ext) { 965 next = ext->ext_next; 966 Free(ext); 967 ext = next; 968 } 969 *head = NULL; 970 } 971 972 /* 973 * FUNCTION: meta_sp_list_remove() 974 * INPUT: head - the head of the list, must be NULL for empty list 975 * ext - the extent to remove, must be a member of the list 976 * OUTPUT: head - points to the new head of the list 977 * RETURNS: void 978 * PURPOSE: unlinks the node specified by ext from the list and 979 * frees it, possibly moving the head pointer forward if 980 * the head is the node being removed. 981 */ 982 static void 983 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 984 { 985 assert(head != NULL); 986 assert(*head != NULL); 987 988 if (*head == ext) 989 *head = ext->ext_next; 990 991 if (ext->ext_prev != NULL) 992 ext->ext_prev->ext_next = ext->ext_next; 993 if (ext->ext_next != NULL) 994 ext->ext_next->ext_prev = ext->ext_prev; 995 Free(ext); 996 } 997 998 /* 999 * FUNCTION: meta_sp_list_size() 1000 * INPUT: head - the head of the list, must be NULL for empty list 1001 * exttype - the type of the extents to sum 1002 * exclude_wm - subtract space for extent headers from total 1003 * OUTPUT: none 1004 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1005 * PURPOSE: sums the lengths of all extents in the list matching the 1006 * specified type. This could be used for computing the 1007 * amount of free or used space, for example. 1008 */ 1009 static sp_ext_length_t 1010 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1011 { 1012 sp_ext_node_t *ext; 1013 sp_ext_length_t size = 0LL; 1014 1015 for (ext = head; ext != NULL; ext = ext->ext_next) 1016 if (ext->ext_type == exttype) 1017 size += ext->ext_length - 1018 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1019 1020 return (size); 1021 } 1022 1023 /* 1024 * FUNCTION: meta_sp_list_find() 1025 * INPUT: head - the head of the list, must be NULL for empty list 1026 * offset - the offset contained by the node to find 1027 * OUTPUT: none 1028 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1029 * or NULL if no such nodes were found. 1030 * PURPOSE: finds a node in a list containing the requested offset 1031 * (inclusive). If multiple nodes contain this offset then 1032 * only the first will be returned, though typically these 1033 * lists are managed with non-overlapping nodes. 1034 * 1035 * *The list MUST be sorted by offset for this function to work.* 1036 */ 1037 static sp_ext_node_t * 1038 meta_sp_list_find( 1039 sp_ext_node_t *head, 1040 sp_ext_offset_t offset 1041 ) 1042 { 1043 sp_ext_node_t *ext; 1044 1045 for (ext = head; ext != NULL; ext = ext->ext_next) { 1046 /* check if the offset lies within this extent */ 1047 if ((offset >= ext->ext_offset) && 1048 (offset < ext->ext_offset + ext->ext_length)) { 1049 /* 1050 * the requested extent should always be a 1051 * subset of an extent in the list. 1052 */ 1053 return (ext); 1054 } 1055 } 1056 return (NULL); 1057 } 1058 1059 /* 1060 * FUNCTION: meta_sp_list_freefill() 1061 * INPUT: head - the head of the list, must be NULL for empty list 1062 * size - the size of the volume this extent list is 1063 * representing 1064 * OUTPUT: head - the new head of the list 1065 * RETURNS: void 1066 * PURPOSE: finds gaps in the extent list and fills them with a free 1067 * node. If there is a gap at the beginning the head 1068 * pointer will be changed to point to the new free node. 1069 * If there is free space at the end, the last free extent 1070 * will extend all the way out to the size specified. 1071 * 1072 * *The list MUST be sorted by offset for this function to work.* 1073 */ 1074 static void 1075 meta_sp_list_freefill( 1076 sp_ext_node_t **head, 1077 sp_ext_length_t size 1078 ) 1079 { 1080 sp_ext_node_t *ext; 1081 sp_ext_offset_t curoff = 0LL; 1082 1083 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1084 if (curoff < ext->ext_offset) 1085 meta_sp_list_insert(NULL, NULL, head, 1086 curoff, ext->ext_offset - curoff, 1087 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1088 curoff = ext->ext_offset + ext->ext_length; 1089 } 1090 1091 /* pad inverse list out to the end */ 1092 if (curoff < size) 1093 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1094 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1095 1096 if (getenv(META_SP_DEBUG)) { 1097 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1098 "holes freefilled:\n"); 1099 meta_sp_list_dump(*head); 1100 } 1101 } 1102 1103 /* 1104 * FUNCTION: meta_sp_list_dump() 1105 * INPUT: head - the head of the list, must be NULL for empty list 1106 * OUTPUT: none 1107 * RETURNS: void 1108 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1109 */ 1110 static void 1111 meta_sp_list_dump(sp_ext_node_t *head) 1112 { 1113 sp_ext_node_t *ext; 1114 1115 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1116 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1117 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1118 "Next"); 1119 for (ext = head; ext != NULL; ext = ext->ext_next) { 1120 if (ext->ext_namep != NULL) 1121 meta_sp_debug("%5s", ext->ext_namep->cname); 1122 else 1123 meta_sp_debug("%5s", "NONE"); 1124 1125 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1126 switch (ext->ext_type) { 1127 case EXTTYP_ALLOC: 1128 meta_sp_debug("%7s ", "ALLOC"); 1129 break; 1130 case EXTTYP_FREE: 1131 meta_sp_debug("%7s ", "FREE"); 1132 break; 1133 case EXTTYP_END: 1134 meta_sp_debug("%7s ", "END"); 1135 break; 1136 case EXTTYP_RESERVED: 1137 meta_sp_debug("%7s ", "RESV"); 1138 break; 1139 default: 1140 meta_sp_debug("%7s ", "INVLD"); 1141 break; 1142 } 1143 1144 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1145 ext->ext_offset, ext->ext_length, 1146 ext->ext_flags, (void *) ext->ext_prev, 1147 (void *) ext->ext_next); 1148 } 1149 meta_sp_debug("\n"); 1150 } 1151 1152 /* 1153 * FUNCTION: meta_sp_list_overlaps() 1154 * INPUT: head - the head of the list, must be NULL for empty list 1155 * OUTPUT: none 1156 * RETURNS: int - 1 if extents overlap, 0 if ok 1157 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1158 * offset for this function to work properly. 1159 */ 1160 static int 1161 meta_sp_list_overlaps(sp_ext_node_t *head) 1162 { 1163 sp_ext_node_t *ext; 1164 1165 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1166 if (ext->ext_offset + ext->ext_length > 1167 ext->ext_next->ext_offset) 1168 return (1); 1169 } 1170 return (0); 1171 } 1172 1173 /* 1174 * ************************************************************************** 1175 * Extent Allocation Functions * 1176 * ************************************************************************** 1177 */ 1178 1179 /* 1180 * FUNCTION: meta_sp_alloc_by_ext() 1181 * INPUT: sp - the set name for the device the node belongs to 1182 * np - the name of the device the node belongs to 1183 * head - the head of the list, must be NULL for empty list 1184 * free_ext - the free extent being allocated from 1185 * alloc_offset - the offset of the allocation 1186 * alloc_len - the length of the allocation 1187 * seq - the sequence number of the allocation 1188 * OUTPUT: head - the new head pointer 1189 * RETURNS: void 1190 * PURPOSE: allocates a portion of the free extent free_ext. The 1191 * allocated portion starts at alloc_offset and is 1192 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1193 * alloc_length) must be contained within the free extent. 1194 * 1195 * The free extent is split into as many as 3 pieces - a 1196 * free extent containing [ free_offset .. alloc_offset ), an 1197 * allocated extent containing the range [ alloc_offset .. 1198 * alloc_end ], and another free extent containing the 1199 * range ( alloc_end .. free_end ]. If either of the two 1200 * new free extents would be zero length, they are not created. 1201 * 1202 * Finally, the original free extent is removed. All newly 1203 * created extents have the EXTFLG_UPDATE flag set. 1204 */ 1205 static void 1206 meta_sp_alloc_by_ext( 1207 mdsetname_t *sp, 1208 mdname_t *np, 1209 sp_ext_node_t **head, 1210 sp_ext_node_t *free_ext, 1211 sp_ext_offset_t alloc_offset, 1212 sp_ext_length_t alloc_length, 1213 uint_t seq 1214 ) 1215 { 1216 sp_ext_offset_t free_offset = free_ext->ext_offset; 1217 sp_ext_length_t free_length = free_ext->ext_length; 1218 1219 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1220 sp_ext_offset_t free_end = free_offset + free_length; 1221 1222 /* allocated extent must be a subset of the free extent */ 1223 assert(free_offset <= alloc_offset); 1224 assert(free_end >= alloc_end); 1225 1226 meta_sp_list_remove(head, free_ext); 1227 1228 if (free_offset < alloc_offset) { 1229 meta_sp_list_insert(NULL, NULL, head, free_offset, 1230 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1231 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1232 } 1233 1234 if (free_end > alloc_end) { 1235 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1236 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1237 meta_sp_cmp_by_offset); 1238 } 1239 1240 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1241 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1242 1243 if (getenv(META_SP_DEBUG)) { 1244 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1245 meta_sp_list_dump(*head); 1246 } 1247 } 1248 1249 /* 1250 * FUNCTION: meta_sp_alloc_by_len() 1251 * INPUT: sp - the set name for the device the node belongs to 1252 * np - the name of the device the node belongs to 1253 * head - the head of the list, must be NULL for empty list 1254 * *lp - the requested length to allocate 1255 * last_off - the last offset already allocated. 1256 * alignment - the desired extent alignmeent 1257 * OUTPUT: head - the new head pointer 1258 * *lp - the length allocated 1259 * RETURNS: int - -1 if error, the number of new extents on success 1260 * PURPOSE: allocates extents from free space to satisfy the requested 1261 * length. If requested length is zero, allocates all 1262 * remaining free space. This function provides the meat 1263 * of the extent allocation algorithm. Allocation is a 1264 * three tier process: 1265 * 1266 * 1. If last_off is nonzero and there is free space following 1267 * that node, then it is extended to allocate as much of that 1268 * free space as possible. This is useful for metattach. 1269 * 2. If a free extent can be found to satisfy the remaining 1270 * requested space, then satisfy the rest of the request 1271 * from that extent. 1272 * 3. Start allocating space from any remaining free extents until 1273 * the remainder of the request is satisified. 1274 * 1275 * If alignment is non-zero, then every extent modified 1276 * or newly allocated will be aligned modulo alignment, 1277 * with a length that is an integer multiple of 1278 * alignment. 1279 * 1280 * The EXTFLG_UPDATE flag is set for all nodes (free and 1281 * allocated) that require updated watermarks. 1282 * 1283 * This algorithm may have a negative impact on fragmentation 1284 * in pathological cases and may be improved if it turns out 1285 * to be a problem. This may be exacerbated by particularly 1286 * large alignments. 1287 * 1288 * NOTE: It's confusing, so it demands an explanation: 1289 * - len is used to represent requested data space; it 1290 * does not include room for a watermark. On each full 1291 * or partial allocation, len will be decremented by 1292 * alloc_len (see next paragraph) until it reaches 1293 * zero. 1294 * - alloc_len is used to represent data space allocated 1295 * from a particular extent; it does not include space 1296 * for a watermark. In the rare event that a_length 1297 * (see next paragraph) is equal to MD_SP_WMSIZE, 1298 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1299 * fragment of space will be utterly unusable. 1300 * - a_length is used to represent all space to be 1301 * allocated from a particular extent; it DOES include 1302 * space for a watermark. 1303 */ 1304 static int 1305 meta_sp_alloc_by_len( 1306 mdsetname_t *sp, 1307 mdname_t *np, 1308 sp_ext_node_t **head, 1309 sp_ext_length_t *lp, 1310 sp_ext_offset_t last_off, 1311 sp_ext_offset_t alignment 1312 ) 1313 { 1314 sp_ext_node_t *free_ext; 1315 sp_ext_node_t *alloc_ext; 1316 uint_t last_seq = 0; 1317 uint_t numexts = 0; 1318 sp_ext_length_t freespace; 1319 sp_ext_length_t alloc_len; 1320 sp_ext_length_t len; 1321 1322 /* We're DOA if we can't read *lp */ 1323 assert(lp != NULL); 1324 len = *lp; 1325 1326 /* 1327 * Process the nominal case first: we've been given an actual 1328 * size argument, rather than the literal "all" 1329 */ 1330 1331 if (len != 0) { 1332 1333 /* 1334 * Short circuit the check for free space. This may 1335 * tell us we have enough space when we really don't 1336 * because each extent loses space to a watermark, but 1337 * it will always tell us there isn't enough space 1338 * correctly. Worst case we do some extra work. 1339 */ 1340 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1341 INCLUDE_WM); 1342 1343 if (freespace < len) 1344 return (-1); 1345 1346 /* 1347 * First see if we can extend the last extent for an 1348 * attach. 1349 */ 1350 if (last_off != 0LL) { 1351 int align = 0; 1352 1353 alloc_ext = 1354 meta_sp_list_find(*head, last_off); 1355 assert(alloc_ext != NULL); 1356 1357 /* 1358 * The offset test reflects the 1359 * inclusion of the watermark in the extent 1360 */ 1361 align = (alignment > 0) && 1362 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1363 alignment) == 0); 1364 1365 /* 1366 * If we decided not to align here, we should 1367 * also reset "alignment" so we don't bother 1368 * later, either. 1369 */ 1370 if (!align) { 1371 alignment = 0; 1372 } 1373 1374 last_seq = alloc_ext->ext_seq; 1375 1376 free_ext = meta_sp_list_find(*head, 1377 alloc_ext->ext_offset + 1378 alloc_ext->ext_length); 1379 1380 /* 1381 * If a free extent follows our last allocated 1382 * extent, then remove the last allocated 1383 * extent and increase the size of the free 1384 * extent to overlap it, then allocate the 1385 * total space from the new free extent. 1386 */ 1387 if (free_ext != NULL && 1388 free_ext->ext_type == EXTTYP_FREE) { 1389 assert(free_ext->ext_offset == 1390 alloc_ext->ext_offset + 1391 alloc_ext->ext_length); 1392 1393 alloc_len = 1394 MIN(len, free_ext->ext_length); 1395 1396 if (align && (alloc_len < len)) { 1397 /* No watermark space needed */ 1398 alloc_len -= alloc_len % alignment; 1399 } 1400 1401 if (alloc_len > 0) { 1402 free_ext->ext_offset -= 1403 alloc_ext->ext_length; 1404 free_ext->ext_length += 1405 alloc_ext->ext_length; 1406 1407 meta_sp_alloc_by_ext(sp, np, head, 1408 free_ext, free_ext->ext_offset, 1409 alloc_ext->ext_length + alloc_len, 1410 last_seq); 1411 1412 /* 1413 * now remove the original allocated 1414 * node. We may have overlapping 1415 * extents for a short time before 1416 * this node is removed. 1417 */ 1418 meta_sp_list_remove(head, alloc_ext); 1419 len -= alloc_len; 1420 } 1421 } 1422 last_seq++; 1423 } 1424 1425 if (len == 0LL) 1426 goto out; 1427 1428 /* 1429 * Next, see if we can find a single allocation for 1430 * the remainder. This may make fragmentation worse 1431 * in some cases, but there's no good way to allocate 1432 * that doesn't have a highly fragmented corner case. 1433 */ 1434 for (free_ext = *head; free_ext != NULL; 1435 free_ext = free_ext->ext_next) { 1436 sp_ext_offset_t a_offset; 1437 sp_ext_offset_t a_length; 1438 1439 if (free_ext->ext_type != EXTTYP_FREE) 1440 continue; 1441 1442 /* 1443 * The length test should include space for 1444 * the watermark 1445 */ 1446 1447 a_offset = free_ext->ext_offset; 1448 a_length = free_ext->ext_length; 1449 1450 if (alignment > 0) { 1451 1452 /* 1453 * Shortcut for extents that have been 1454 * previously added to pad out the 1455 * data space 1456 */ 1457 if (a_length < alignment) { 1458 continue; 1459 } 1460 1461 /* 1462 * Round up so the data space begins 1463 * on a properly aligned boundary. 1464 */ 1465 a_offset += alignment - 1466 (a_offset % alignment) - MD_SP_WMSIZE; 1467 1468 /* 1469 * This is only necessary in case the 1470 * watermark size is ever greater than 1471 * one. It'll never happen, of 1472 * course; we'll get rid of watermarks 1473 * before we make 'em bigger. 1474 */ 1475 if (a_offset < free_ext->ext_offset) { 1476 a_offset += alignment; 1477 } 1478 1479 /* 1480 * Adjust the length to account for 1481 * the space lost above (if any) 1482 */ 1483 a_length -= 1484 (a_offset - free_ext->ext_offset); 1485 } 1486 1487 if (a_length >= len + MD_SP_WMSIZE) { 1488 meta_sp_alloc_by_ext(sp, np, head, 1489 free_ext, a_offset, 1490 len + MD_SP_WMSIZE, last_seq); 1491 1492 len = 0LL; 1493 numexts++; 1494 break; 1495 } 1496 } 1497 1498 if (len == 0LL) 1499 goto out; 1500 1501 1502 /* 1503 * If the request could not be satisfied by extending 1504 * the last extent or by a single extent, then put 1505 * multiple smaller extents together until the request 1506 * is satisfied. 1507 */ 1508 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1509 free_ext = free_ext->ext_next) { 1510 sp_ext_offset_t a_offset; 1511 sp_ext_length_t a_length; 1512 1513 if (free_ext->ext_type != EXTTYP_FREE) 1514 continue; 1515 1516 a_offset = free_ext->ext_offset; 1517 a_length = free_ext->ext_length; 1518 1519 if (alignment > 0) { 1520 1521 /* 1522 * Shortcut for extents that have been 1523 * previously added to pad out the 1524 * data space 1525 */ 1526 if (a_length < alignment) { 1527 continue; 1528 } 1529 1530 /* 1531 * Round up so the data space begins 1532 * on a properly aligned boundary. 1533 */ 1534 a_offset += alignment - 1535 (a_offset % alignment) - MD_SP_WMSIZE; 1536 1537 /* 1538 * This is only necessary in case the 1539 * watermark size is ever greater than 1540 * one. It'll never happen, of 1541 * course; we'll get rid of watermarks 1542 * before we make 'em bigger. 1543 */ 1544 if (a_offset < free_ext->ext_offset) { 1545 a_offset += alignment; 1546 } 1547 1548 /* 1549 * Adjust the length to account for 1550 * the space lost above (if any) 1551 */ 1552 a_length -= 1553 (a_offset - free_ext->ext_offset); 1554 1555 /* 1556 * Adjust the length to be properly 1557 * aligned if it is NOT to be the 1558 * last extent in the soft partition. 1559 */ 1560 if ((a_length - MD_SP_WMSIZE) < len) 1561 a_length -= 1562 (a_length - MD_SP_WMSIZE) 1563 % alignment; 1564 } 1565 1566 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1567 if (alloc_len == 0) 1568 continue; 1569 1570 /* 1571 * meta_sp_alloc_by_ext() expects the 1572 * allocation length to include the watermark 1573 * size, which is why we don't simply pass in 1574 * alloc_len here. 1575 */ 1576 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1577 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1578 last_seq); 1579 1580 len -= alloc_len; 1581 numexts++; 1582 last_seq++; 1583 } 1584 1585 1586 /* 1587 * If there was not enough space we can throw it all 1588 * away since no real work has been done yet. 1589 */ 1590 if (len != 0) { 1591 meta_sp_list_free(head); 1592 return (-1); 1593 } 1594 } 1595 1596 /* 1597 * Otherwise, the literal "all" was specified: allocate all 1598 * available free space. Don't bother with alignment. 1599 */ 1600 else { 1601 /* First, extend the last extent if this is a grow */ 1602 if (last_off != 0LL) { 1603 alloc_ext = 1604 meta_sp_list_find(*head, last_off); 1605 assert(alloc_ext != NULL); 1606 1607 last_seq = alloc_ext->ext_seq; 1608 1609 free_ext = meta_sp_list_find(*head, 1610 alloc_ext->ext_offset + 1611 alloc_ext->ext_length); 1612 1613 /* 1614 * If a free extent follows our last allocated 1615 * extent, then remove the last allocated 1616 * extent and increase the size of the free 1617 * extent to overlap it, then allocate the 1618 * total space from the new free extent. 1619 */ 1620 if (free_ext != NULL && 1621 free_ext->ext_type == EXTTYP_FREE) { 1622 assert(free_ext->ext_offset == 1623 alloc_ext->ext_offset + 1624 alloc_ext->ext_length); 1625 1626 len = alloc_len = 1627 free_ext->ext_length; 1628 1629 free_ext->ext_offset -= 1630 alloc_ext->ext_length; 1631 free_ext->ext_length += 1632 alloc_ext->ext_length; 1633 1634 meta_sp_alloc_by_ext(sp, np, head, 1635 free_ext, free_ext->ext_offset, 1636 alloc_ext->ext_length + alloc_len, 1637 last_seq); 1638 1639 /* 1640 * now remove the original allocated 1641 * node. We may have overlapping 1642 * extents for a short time before 1643 * this node is removed. 1644 */ 1645 meta_sp_list_remove(head, alloc_ext); 1646 } 1647 1648 last_seq++; 1649 } 1650 1651 /* Next, grab all remaining free space */ 1652 for (free_ext = *head; free_ext != NULL; 1653 free_ext = free_ext->ext_next) { 1654 1655 if (free_ext->ext_type == EXTTYP_FREE) { 1656 alloc_len = 1657 free_ext->ext_length - MD_SP_WMSIZE; 1658 if (alloc_len == 0) 1659 continue; 1660 1661 /* 1662 * meta_sp_alloc_by_ext() expects the 1663 * allocation length to include the 1664 * watermark size, which is why we 1665 * don't simply pass in alloc_len 1666 * here. 1667 */ 1668 meta_sp_alloc_by_ext(sp, np, head, 1669 free_ext, free_ext->ext_offset, 1670 free_ext->ext_length, 1671 last_seq); 1672 1673 len += alloc_len; 1674 numexts++; 1675 last_seq++; 1676 } 1677 } 1678 } 1679 1680 out: 1681 if (getenv(META_SP_DEBUG)) { 1682 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1683 "allocation:\n"); 1684 meta_sp_list_dump(*head); 1685 } 1686 1687 if (*lp == 0) { 1688 *lp = len; 1689 1690 /* 1691 * Make sure the callers hit a no space error if we 1692 * didn't actually find anything. 1693 */ 1694 if (len == 0) { 1695 return (-1); 1696 } 1697 } 1698 1699 return (numexts); 1700 } 1701 1702 /* 1703 * FUNCTION: meta_sp_alloc_by_list() 1704 * INPUT: sp - the set name for the device the node belongs to 1705 * np - the name of the device the node belongs to 1706 * head - the head of the list, must be NULL for empty list 1707 * oblist - an extent list containing requested nodes to allocate 1708 * OUTPUT: head - the new head pointer 1709 * RETURNS: int - -1 if error, the number of new extents on success 1710 * PURPOSE: allocates extents from free space to satisfy the requested 1711 * extent list. This is primarily used for the -o/-b options 1712 * where the user may specifically request extents to allocate. 1713 * Each extent in the oblist must be a subset (inclusive) of a 1714 * free extent and may not overlap each other. This 1715 * function sets the EXTFLG_UPDATE flag for each node that 1716 * requires a watermark update after allocating. 1717 */ 1718 static int 1719 meta_sp_alloc_by_list( 1720 mdsetname_t *sp, 1721 mdname_t *np, 1722 sp_ext_node_t **head, 1723 sp_ext_node_t *oblist 1724 ) 1725 { 1726 sp_ext_node_t *ext; 1727 sp_ext_node_t *free_ext; 1728 uint_t numexts = 0; 1729 1730 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1731 1732 free_ext = meta_sp_list_find(*head, 1733 ext->ext_offset - MD_SP_WMSIZE); 1734 1735 /* Make sure the allocation is within the free extent */ 1736 if ((free_ext == NULL) || 1737 (ext->ext_offset + ext->ext_length > 1738 free_ext->ext_offset + free_ext->ext_length) || 1739 (free_ext->ext_type != EXTTYP_FREE)) 1740 return (-1); 1741 1742 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1743 ext->ext_offset - MD_SP_WMSIZE, 1744 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1745 1746 numexts++; 1747 } 1748 1749 assert(meta_sp_list_overlaps(*head) == 0); 1750 1751 if (getenv(META_SP_DEBUG)) { 1752 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1753 "allocation:\n"); 1754 meta_sp_list_dump(*head); 1755 } 1756 1757 return (numexts); 1758 } 1759 1760 /* 1761 * ************************************************************************** 1762 * Extent List Population Functions * 1763 * ************************************************************************** 1764 */ 1765 1766 /* 1767 * FUNCTION: meta_sp_extlist_from_namelist() 1768 * INPUT: sp - the set name for the device the node belongs to 1769 * spnplp - the namelist of soft partitions to build a list from 1770 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1771 * ep - return error pointer 1772 * RETURNS: int - -1 if error, 0 on success 1773 * PURPOSE: builds an extent list representing the soft partitions 1774 * specified in the namelist. Each extent in each soft 1775 * partition is added to the list with the type EXTTYP_ALLOC. 1776 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1777 * extent in the list includes the space occupied by the 1778 * watermark, which is not included in the unit structures. 1779 */ 1780 static int 1781 meta_sp_extlist_from_namelist( 1782 mdsetname_t *sp, 1783 mdnamelist_t *spnlp, 1784 sp_ext_node_t **extlist, 1785 md_error_t *ep 1786 ) 1787 { 1788 int extn; 1789 md_sp_t *msp; /* unit structure of the sp's */ 1790 mdnamelist_t *namep; 1791 1792 assert(sp != NULL); 1793 1794 /* 1795 * Now go through the soft partitions and add a node to the used 1796 * list for each allocated extent. 1797 */ 1798 for (namep = spnlp; namep != NULL; namep = namep->next) { 1799 mdname_t *curnp = namep->namep; 1800 1801 /* get the unit structure */ 1802 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1803 return (-1); 1804 1805 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1806 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1807 1808 /* 1809 * subtract from offset and add to the length 1810 * to account for the watermark, which is not 1811 * contained in the extents in the unit structure. 1812 */ 1813 meta_sp_list_insert(sp, curnp, extlist, 1814 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1815 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1816 } 1817 } 1818 return (0); 1819 } 1820 1821 /* 1822 * FUNCTION: meta_sp_extlist_from_wm() 1823 * INPUT: sp - the set name for the device the node belongs to 1824 * compnp - the name of the device to scan watermarks on 1825 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1826 * ep - return error pointer 1827 * RETURNS: int - -1 if error, 0 on success 1828 * PURPOSE: builds an extent list representing the soft partitions 1829 * specified in the namelist. Each extent in each soft 1830 * partition is added to the list with the type EXTTYP_ALLOC. 1831 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1832 * extent in the list includes the space occupied by the 1833 * watermark, which is not included in the unit structures. 1834 */ 1835 static int 1836 meta_sp_extlist_from_wm( 1837 mdsetname_t *sp, 1838 mdname_t *compnp, 1839 sp_ext_node_t **extlist, 1840 ext_cmpfunc_t compare, 1841 md_error_t *ep 1842 ) 1843 { 1844 mp_watermark_t wm; 1845 mdname_t *np = NULL; 1846 mdsetname_t *spsetp = NULL; 1847 sp_ext_offset_t cur_off; 1848 md_set_desc *sd; 1849 int init = 0; 1850 mdkey_t key; 1851 minor_t mnum; 1852 1853 if (!metaislocalset(sp)) { 1854 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1855 return (-1); 1856 } 1857 1858 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1859 return (-1); 1860 1861 for (;;) { 1862 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1863 return (-1); 1864 } 1865 1866 /* get the set and name pointers */ 1867 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1868 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1869 return (-1); 1870 } 1871 } 1872 1873 /* 1874 * For the MN set, meta_init_make_device needs to 1875 * be run on all the nodes so the entries for the 1876 * softpart device name and its comp can be created 1877 * in the same order in the replica namespace. If 1878 * we have it run on mdmn_do_iocset then the mddbs 1879 * will be out of sync between master node and slave 1880 * nodes. 1881 */ 1882 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1883 1884 if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) { 1885 md_mn_msg_addmdname_t *send_params; 1886 int result; 1887 md_mn_result_t *resp = NULL; 1888 int message_size; 1889 1890 message_size = sizeof (*send_params) + 1891 strlen(wm.wm_mdname) + 1; 1892 send_params = Zalloc(message_size); 1893 send_params->addmdname_setno = sp->setno; 1894 (void) strcpy(&send_params->addmdname_name[0], 1895 wm.wm_mdname); 1896 result = mdmn_send_message(sp->setno, 1897 MD_MN_MSG_ADDMDNAME, 1898 MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, 1899 (char *)send_params, message_size, &resp, 1900 ep); 1901 Free(send_params); 1902 if (resp != NULL) { 1903 if (resp->mmr_exitval != 0) { 1904 free_result(resp); 1905 return (-1); 1906 } 1907 free_result(resp); 1908 } 1909 if (result != 0) 1910 return (-1); 1911 } else { 1912 1913 if (!is_existing_meta_hsp(sp, wm.wm_mdname)) { 1914 if ((key = meta_init_make_device(&sp, 1915 wm.wm_mdname, ep)) <= 0) { 1916 return (-1); 1917 } 1918 init = 1; 1919 } 1920 } 1921 1922 np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep); 1923 if (np == NULL) { 1924 if (init) { 1925 if (meta_getnmentbykey(sp->setno, 1926 MD_SIDEWILD, key, NULL, &mnum, 1927 NULL, ep) != NULL) { 1928 (void) metaioctl(MD_IOCREM_DEV, 1929 &mnum, ep, NULL); 1930 } 1931 (void) del_self_name(sp, key, ep); 1932 } 1933 return (-1); 1934 } 1935 } 1936 1937 /* insert watermark into extent list */ 1938 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1939 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1940 EXTFLG_UPDATE, compare); 1941 1942 /* if we see the end watermark, we're done */ 1943 if (wm.wm_type == EXTTYP_END) 1944 break; 1945 1946 cur_off += wm.wm_length + 1; 1947 1948 /* clear out set and name pointers for next iteration */ 1949 np = NULL; 1950 spsetp = NULL; 1951 } 1952 1953 return (0); 1954 } 1955 1956 /* 1957 * ************************************************************************** 1958 * Print (metastat) Functions * 1959 * ************************************************************************** 1960 */ 1961 1962 /* 1963 * FUNCTION: meta_sp_short_print() 1964 * INPUT: msp - the unit structure to display 1965 * fp - the file pointer to send output to 1966 * options - print options from the command line processor 1967 * OUTPUT: ep - return error pointer 1968 * RETURNS: int - -1 if error, 0 on success 1969 * PURPOSE: display a short report of the soft partition in md.tab 1970 * form, primarily used for metastat -p. 1971 */ 1972 static int 1973 meta_sp_short_print( 1974 md_sp_t *msp, 1975 char *fname, 1976 FILE *fp, 1977 mdprtopts_t options, 1978 md_error_t *ep 1979 ) 1980 { 1981 int extn; 1982 1983 if (options & PRINT_LARGEDEVICES) { 1984 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) 1985 return (0); 1986 } 1987 1988 if (options & PRINT_FN) { 1989 if ((msp->common.revision & MD_FN_META_DEV) == 0) 1990 return (0); 1991 } 1992 1993 /* print name and -p */ 1994 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1995 return (mdsyserror(ep, errno, fname)); 1996 1997 /* print the component */ 1998 /* 1999 * Always print the full path name 2000 */ 2001 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 2002 return (mdsyserror(ep, errno, fname)); 2003 2004 /* print out each extent */ 2005 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2006 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2007 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 2008 extp->len) == EOF) 2009 return (mdsyserror(ep, errno, fname)); 2010 } 2011 2012 if (fprintf(fp, "\n") == EOF) 2013 return (mdsyserror(ep, errno, fname)); 2014 2015 /* success */ 2016 return (0); 2017 } 2018 2019 /* 2020 * FUNCTION: meta_sp_status_to_name() 2021 * INPUT: xsp_status - the status value to convert to a string 2022 * tstate - transient errored device state. If set the 2023 * device is Unavailable 2024 * OUTPUT: none 2025 * RETURNS: char * - a pointer to the string representing the status value 2026 * PURPOSE: return an internationalized string representing the 2027 * status value for a soft partition. The strings are 2028 * strdup'd and must be freed by the caller. 2029 */ 2030 static char * 2031 meta_sp_status_to_name( 2032 xsp_status_t xsp_status, 2033 uint_t tstate 2034 ) 2035 { 2036 char *rval = NULL; 2037 2038 /* 2039 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 2040 * value for an 'Unavailable' return. tstate can be set because of 2041 * other multi-node reasons (e.g. ABR being set) 2042 */ 2043 if (tstate & MD_INACCESSIBLE) { 2044 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 2045 } 2046 2047 switch (xsp_status) { 2048 case MD_SP_CREATEPEND: 2049 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 2050 break; 2051 case MD_SP_GROWPEND: 2052 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 2053 break; 2054 case MD_SP_DELPEND: 2055 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 2056 break; 2057 case MD_SP_OK: 2058 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 2059 break; 2060 case MD_SP_ERR: 2061 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2062 break; 2063 case MD_SP_RECOVER: 2064 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2065 break; 2066 } 2067 2068 if (rval == NULL) 2069 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2070 2071 return (rval); 2072 } 2073 2074 /* 2075 * FUNCTION: meta_sp_report() 2076 * INPUT: sp - the set name for the unit being displayed 2077 * msp - the unit structure to display 2078 * nlpp - pass back the large devs 2079 * fp - the file pointer to send output to 2080 * options - print options from the command line processor 2081 * OUTPUT: ep - return error pointer 2082 * RETURNS: int - -1 if error, 0 on success 2083 * PURPOSE: print a full report of the device specified 2084 */ 2085 static int 2086 meta_sp_report( 2087 mdsetname_t *sp, 2088 md_sp_t *msp, 2089 mdnamelist_t **nlpp, 2090 char *fname, 2091 FILE *fp, 2092 mdprtopts_t options, 2093 md_error_t *ep 2094 ) 2095 { 2096 uint_t extn; 2097 char *status; 2098 char *devid = ""; 2099 mdname_t *didnp = NULL; 2100 ddi_devid_t dtp; 2101 int len; 2102 uint_t tstate = 0; 2103 2104 if (options & PRINT_LARGEDEVICES) { 2105 if ((msp->common.revision & MD_64BIT_META_DEV) == 0) { 2106 return (0); 2107 } else { 2108 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2109 return (-1); 2110 } 2111 } 2112 2113 if (options & PRINT_FN) { 2114 if ((msp->common.revision & MD_FN_META_DEV) == 0) { 2115 return (0); 2116 } else { 2117 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2118 return (-1); 2119 } 2120 } 2121 2122 if (options & PRINT_HEADER) { 2123 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2124 msp->common.namep->cname) == EOF) 2125 return (mdsyserror(ep, errno, fname)); 2126 } 2127 2128 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2129 msp->compnamep->cname) == EOF) 2130 return (mdsyserror(ep, errno, fname)); 2131 2132 /* Determine if device is available before displaying status */ 2133 if (metaismeta(msp->common.namep)) { 2134 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2135 return (-1); 2136 } 2137 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2138 2139 /* print out "State" to be consistent with other metadevices */ 2140 if (tstate & MD_ABR_CAP) { 2141 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2142 " State: %s - Application Based Recovery (ABR)\n"), 2143 status) == EOF) { 2144 Free(status); 2145 return (mdsyserror(ep, errno, fname)); 2146 } 2147 } else { 2148 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2149 " State: %s\n"), status) == EOF) { 2150 Free(status); 2151 return (mdsyserror(ep, errno, fname)); 2152 } 2153 } 2154 free(status); 2155 2156 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2157 msp->common.size, 2158 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2159 return (mdsyserror(ep, errno, fname)); 2160 2161 /* print component details */ 2162 if (! metaismeta(msp->compnamep)) { 2163 diskaddr_t start_blk; 2164 int has_mddb; 2165 char *has_mddb_str; 2166 2167 /* print header */ 2168 /* 2169 * Building a format string on the fly that will 2170 * be used in (f)printf. This allows the length 2171 * of the ctd to vary from small to large without 2172 * looking horrible. 2173 */ 2174 len = strlen(msp->compnamep->cname); 2175 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2176 len += 2; 2177 if (fprintf(fp, 2178 "\t%-*.*s %-12.12s %-5.5s %s\n", 2179 len, len, 2180 dgettext(TEXT_DOMAIN, "Device"), 2181 dgettext(TEXT_DOMAIN, "Start Block"), 2182 dgettext(TEXT_DOMAIN, "Dbase"), 2183 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2184 return (mdsyserror(ep, errno, fname)); 2185 } 2186 2187 2188 /* get info */ 2189 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2190 MD_DISKADDR_ERROR) 2191 return (-1); 2192 2193 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2194 return (-1); 2195 2196 if (has_mddb) 2197 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2198 else 2199 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2200 2201 /* populate the key in the name_p structure */ 2202 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2203 if (didnp == NULL) { 2204 return (-1); 2205 } 2206 2207 /* determine if devid does NOT exist */ 2208 if (options & PRINT_DEVID) { 2209 if ((dtp = meta_getdidbykey(sp->setno, 2210 getmyside(sp, ep), didnp->key, ep)) == NULL) 2211 devid = dgettext(TEXT_DOMAIN, "No "); 2212 else { 2213 devid = dgettext(TEXT_DOMAIN, "Yes"); 2214 free(dtp); 2215 } 2216 } 2217 2218 /* print info */ 2219 /* 2220 * This allows the length 2221 * of the ctd to vary from small to large without 2222 * looking horrible. 2223 */ 2224 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2225 len, msp->compnamep->cname, 2226 start_blk, has_mddb_str, devid) == EOF) { 2227 return (mdsyserror(ep, errno, fname)); 2228 } 2229 (void) fprintf(fp, "\n"); 2230 } 2231 2232 2233 /* print the headers */ 2234 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2235 dgettext(TEXT_DOMAIN, "Extent"), 2236 dgettext(TEXT_DOMAIN, "Start Block"), 2237 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2238 return (mdsyserror(ep, errno, fname)); 2239 2240 /* print out each extent */ 2241 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2242 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2243 2244 /* If PRINT_TIMES option is ever supported, add output here */ 2245 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2246 extn, extp->poff, extp->len) == EOF) 2247 return (mdsyserror(ep, errno, fname)); 2248 } 2249 2250 /* separate records with a newline */ 2251 (void) fprintf(fp, "\n"); 2252 return (0); 2253 } 2254 2255 /* 2256 * FUNCTION: meta_sp_print() 2257 * INPUT: sp - the set name for the unit being displayed 2258 * np - the name of the device to print 2259 * fname - ??? not used 2260 * fp - the file pointer to send output to 2261 * options - print options from the command line processor 2262 * OUTPUT: ep - return error pointer 2263 * RETURNS: int - -1 if error, 0 on success 2264 * PURPOSE: print a full report of the device specified by metastat. 2265 * This is the main entry point for printing. 2266 */ 2267 int 2268 meta_sp_print( 2269 mdsetname_t *sp, 2270 mdname_t *np, 2271 mdnamelist_t **nlpp, 2272 char *fname, 2273 FILE *fp, 2274 mdprtopts_t options, 2275 md_error_t *ep 2276 ) 2277 { 2278 md_sp_t *msp; 2279 md_unit_t *mdp; 2280 int rval = 0; 2281 set_t setno; 2282 minor_t unit; 2283 2284 /* should always have the same set */ 2285 assert(sp != NULL); 2286 2287 /* print all the soft partitions */ 2288 if (np == NULL) { 2289 mdnamelist_t *nlp = NULL; 2290 mdnamelist_t *p; 2291 int cnt; 2292 2293 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2294 return (-1); 2295 else if (cnt == 0) 2296 return (0); 2297 2298 /* recusively print them out */ 2299 for (p = nlp; (p != NULL); p = p->next) { 2300 mdname_t *curnp = p->namep; 2301 2302 /* 2303 * one problem with the rval of -1 here is that 2304 * the error gets "lost" when the next device is 2305 * printed, but we want to print them all anyway. 2306 */ 2307 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2308 options, ep); 2309 } 2310 2311 /* clean up, return success */ 2312 metafreenamelist(nlp); 2313 return (rval); 2314 } 2315 2316 /* get the unit structure */ 2317 if ((msp = meta_get_sp_common(sp, np, 2318 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2319 return (-1); 2320 2321 /* check for parented */ 2322 if ((! (options & PRINT_SUBDEVS)) && 2323 (MD_HAS_PARENT(msp->common.parent))) { 2324 return (0); 2325 } 2326 2327 /* print appropriate detail */ 2328 if (options & PRINT_SHORT) { 2329 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2330 return (-1); 2331 } else { 2332 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2333 return (-1); 2334 } 2335 2336 /* 2337 * Print underlying metadevices if they are parented to us and 2338 * if the info for the underlying metadevice has not been printed. 2339 */ 2340 if (metaismeta(msp->compnamep)) { 2341 /* get the unit structure for the subdevice */ 2342 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2343 return (-1); 2344 2345 setno = MD_MIN2SET(MD_SID(mdp)); 2346 unit = MD_MIN2UNIT(MD_SID(mdp)); 2347 2348 /* If info not already printed, recurse */ 2349 if (sp_parent_printed[setno] == NULL || 2350 !BT_TEST(sp_parent_printed[setno], unit)) { 2351 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2352 (options | PRINT_HEADER | PRINT_SUBDEVS), 2353 NULL, ep) != 0) { 2354 return (-1); 2355 } 2356 if (sp_parent_printed[setno] == NULL) 2357 sp_parent_printed[setno] = 2358 Zalloc(BT_SIZEOFMAP(MD_MAXUNITS)); 2359 BT_SET(sp_parent_printed[setno], unit); 2360 } 2361 } 2362 return (0); 2363 } 2364 2365 /* 2366 * ************************************************************************** 2367 * Watermark Manipulation Functions * 2368 * ************************************************************************** 2369 */ 2370 2371 /* 2372 * FUNCTION: meta_sp_get_start() 2373 * INPUT: sp - the operating set 2374 * np - device upon which the sp is being built 2375 * OUTPUT: ep - return error pointer 2376 * RETURNS: daddr_t - -1 if error, otherwise the start block 2377 * PURPOSE: Encapsulate the determination of the start block of the 2378 * device upon which the sp is built or being built. 2379 */ 2380 static diskaddr_t 2381 meta_sp_get_start( 2382 mdsetname_t *sp, 2383 mdname_t *np, 2384 md_error_t *ep 2385 ) 2386 { 2387 daddr_t start_block; 2388 2389 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) 2390 start_block += MD_SP_START; 2391 2392 return (start_block); 2393 } 2394 2395 /* 2396 * FUNCTION: meta_sp_update_wm_common() 2397 * INPUT: sp - the operating set 2398 * msp - a pointer to the XDR unit structure 2399 * extlist - the extent list specifying watermarks to update 2400 * iocval - either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM 2401 * OUTPUT: ep - return error pointer 2402 * RETURNS: int - -1 if error, 0 on success 2403 * PURPOSE: steps backwards through the extent list updating 2404 * watermarks for all extents with the EXTFLG_UPDATE flag 2405 * set. Writing the watermarks guarantees consistency when 2406 * extents must be broken into pieces since the original 2407 * watermark will be the last to be updated, and will be 2408 * changed to point to a new watermark that is already 2409 * known to be consistent. If one of the writes fails, the 2410 * original watermark stays intact and none of the changes 2411 * are realized. 2412 */ 2413 static int 2414 meta_sp_update_wm_common( 2415 mdsetname_t *sp, 2416 md_sp_t *msp, 2417 sp_ext_node_t *extlist, 2418 int iocval, 2419 md_error_t *ep 2420 ) 2421 { 2422 sp_ext_node_t *ext; 2423 sp_ext_node_t *tail; 2424 mp_watermark_t *wmp, *watermarks; 2425 xsp_offset_t *osp, *offsets; 2426 int update_count = 0; 2427 int rval = 0; 2428 md_unit_t *mdp; 2429 md_sp_update_wm_t update_params; 2430 2431 if (getenv(META_SP_DEBUG)) { 2432 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2433 meta_sp_list_dump(extlist); 2434 } 2435 2436 /* 2437 * find the last node so we can write the watermarks backwards 2438 * and count watermarks to update so we can allocate space 2439 */ 2440 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2441 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2442 update_count++; 2443 } 2444 2445 if (ext->ext_next == NULL) { 2446 tail = ext; 2447 } 2448 } 2449 ext = tail; 2450 2451 wmp = watermarks = 2452 Zalloc(update_count * sizeof (mp_watermark_t)); 2453 osp = offsets = 2454 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2455 2456 while (ext != NULL) { 2457 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2458 /* update watermark */ 2459 wmp->wm_magic = MD_SP_MAGIC; 2460 wmp->wm_version = MD_SP_VERSION; 2461 wmp->wm_type = ext->ext_type; 2462 wmp->wm_seq = ext->ext_seq; 2463 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2464 2465 /* fill in the volume name and set name */ 2466 if (ext->ext_namep != NULL) 2467 (void) strcpy(wmp->wm_mdname, 2468 ext->ext_namep->cname); 2469 else 2470 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2471 if (ext->ext_setp != NULL && 2472 ext->ext_setp->setno != MD_LOCAL_SET) 2473 (void) strcpy(wmp->wm_setname, 2474 ext->ext_setp->setname); 2475 else 2476 (void) strcpy(wmp->wm_setname, 2477 MD_SP_LOCALSETNAME); 2478 2479 /* Generate the checksum */ 2480 wmp->wm_checksum = 0; 2481 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2482 sizeof (*wmp), NULL); 2483 2484 /* record the extent offset */ 2485 *osp = ext->ext_offset; 2486 2487 /* Advance the placeholders */ 2488 osp++; wmp++; 2489 } 2490 ext = ext->ext_prev; 2491 } 2492 2493 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2494 if (mdp == NULL) { 2495 rval = -1; 2496 goto out; 2497 } 2498 2499 (void) memset(&update_params, 0, sizeof (update_params)); 2500 update_params.mnum = MD_SID(mdp); 2501 update_params.count = update_count; 2502 update_params.wmp = (uintptr_t)watermarks; 2503 update_params.osp = (uintptr_t)offsets; 2504 MD_SETDRIVERNAME(&update_params, MD_SP, 2505 MD_MIN2SET(update_params.mnum)); 2506 2507 if (metaioctl(iocval, &update_params, &update_params.mde, 2508 msp->common.namep->cname) != 0) { 2509 (void) mdstealerror(ep, &update_params.mde); 2510 rval = -1; 2511 goto out; 2512 } 2513 2514 out: 2515 Free(watermarks); 2516 Free(offsets); 2517 2518 return (rval); 2519 } 2520 2521 static int 2522 meta_sp_update_wm( 2523 mdsetname_t *sp, 2524 md_sp_t *msp, 2525 sp_ext_node_t *extlist, 2526 md_error_t *ep 2527 ) 2528 { 2529 return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM, 2530 ep)); 2531 } 2532 2533 static int 2534 meta_mn_sp_update_wm( 2535 mdsetname_t *sp, 2536 md_sp_t *msp, 2537 sp_ext_node_t *extlist, 2538 md_error_t *ep 2539 ) 2540 { 2541 return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM, 2542 ep)); 2543 } 2544 2545 /* 2546 * FUNCTION: meta_sp_clear_wm() 2547 * INPUT: sp - the operating set 2548 * msp - the unit structure for the soft partition to clear 2549 * OUTPUT: ep - return error pointer 2550 * RETURNS: int - -1 if error, 0 on success 2551 * PURPOSE: steps through the extents for a soft partition unit and 2552 * creates an extent list designed to mark all of the 2553 * watermarks for those extents as free. The extent list 2554 * is then passed to meta_sp_update_wm() to actually write 2555 * the watermarks out. 2556 */ 2557 static int 2558 meta_sp_clear_wm( 2559 mdsetname_t *sp, 2560 md_sp_t *msp, 2561 md_error_t *ep 2562 ) 2563 { 2564 sp_ext_node_t *extlist = NULL; 2565 int numexts = msp->ext.ext_len; 2566 uint_t i; 2567 int rval = 0; 2568 2569 /* for each watermark must set the flag to SP_FREE */ 2570 for (i = 0; i < numexts; i++) { 2571 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2572 2573 meta_sp_list_insert(NULL, NULL, &extlist, 2574 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2575 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2576 } 2577 2578 /* update watermarks */ 2579 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2580 2581 meta_sp_list_free(&extlist); 2582 return (rval); 2583 } 2584 2585 /* 2586 * FUNCTION: meta_sp_read_wm() 2587 * INPUT: sp - setname for component 2588 * compnp - mdname_t for component 2589 * offset - the offset of the watermark to read (sectors) 2590 * OUTPUT: wm - the watermark structure to read into 2591 * ep - return error pointer 2592 * RETURNS: int - -1 if error, 0 on success 2593 * PURPOSE: seeks out to the requested offset and reads a watermark. 2594 * It then verifies that the magic number is correct and 2595 * that the checksum is valid, returning an error if either 2596 * is wrong. 2597 */ 2598 static int 2599 meta_sp_read_wm( 2600 mdsetname_t *sp, 2601 mdname_t *compnp, 2602 mp_watermark_t *wm, 2603 sp_ext_offset_t offset, 2604 md_error_t *ep 2605 ) 2606 { 2607 md_sp_read_wm_t read_params; 2608 2609 /* 2610 * make sure block offset does not overflow 2^64 bytes and it's a 2611 * multiple of the block size. 2612 */ 2613 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2614 /* LINTED */ 2615 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2616 2617 (void) memset(wm, 0, sizeof (*wm)); 2618 2619 (void) memset(&read_params, 0, sizeof (read_params)); 2620 read_params.rdev = compnp->dev; 2621 read_params.wmp = (uintptr_t)wm; 2622 read_params.offset = offset; 2623 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2624 2625 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2626 &read_params.mde, compnp->cname) != 0) { 2627 2628 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2629 "Extent header read failed, block %llu.\n"), offset); 2630 return (mdstealerror(ep, &read_params.mde)); 2631 } 2632 2633 /* make sure magic number is correct */ 2634 if (wm->wm_magic != MD_SP_MAGIC) { 2635 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2636 "found incorrect magic number %x, expected %x.\n"), 2637 wm->wm_magic, MD_SP_MAGIC); 2638 /* 2639 * Pass NULL for the device name as we don't have 2640 * valid watermark contents. 2641 */ 2642 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2643 } 2644 2645 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2646 sizeof (*wm), NULL)) { 2647 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2648 "found incorrect checksum %x.\n"), 2649 wm->wm_checksum); 2650 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2651 } 2652 2653 return (0); 2654 } 2655 2656 /* 2657 * ************************************************************************** 2658 * Query Functions 2659 * ************************************************************************** 2660 */ 2661 2662 /* 2663 * IMPORTANT NOTE: This is a static function that assumes that 2664 * its input parameters have been checked and 2665 * have valid values that lie within acceptable 2666 * ranges. 2667 * 2668 * FUNCTION: meta_sp_enough_space() 2669 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2670 * must be > 0 2671 * desired_sp_size - the desired soft partition size in blocks; 2672 * must be > 0 2673 * extent_listpp - a reference to a reference to an extent 2674 * list that lists the extents on a device; 2675 * must be a reference to a reference to a 2676 * valid extent list 2677 * alignment - the desired data space alignment for the sp's 2678 * OUTPUT: boolean_t return value 2679 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2680 * list to create the desired soft partitions, 2681 * B_FALSE if there's not enough space 2682 * PURPOSE: determines whether there's enough free space in an extent 2683 * list to allow creation of a set of soft partitions 2684 */ 2685 static boolean_t 2686 meta_sp_enough_space( 2687 int desired_number_of_sps, 2688 blkcnt_t desired_sp_size, 2689 sp_ext_node_t **extent_listpp, 2690 sp_ext_length_t alignment 2691 ) 2692 { 2693 boolean_t enough_space; 2694 int number_of_sps; 2695 int number_of_extents_used; 2696 sp_ext_length_t desired_ext_length = desired_sp_size; 2697 2698 enough_space = B_TRUE; 2699 number_of_sps = 0; 2700 while ((enough_space == B_TRUE) && 2701 (number_of_sps < desired_number_of_sps)) { 2702 /* 2703 * Use the extent allocation algorithm implemented by 2704 * meta_sp_alloc_by_len() to test whether the free 2705 * extents in the extent list referenced by *extent_listpp 2706 * contain enough space to accomodate a soft partition 2707 * of size desired_ext_length. 2708 * 2709 * Repeat the test <desired_number_of_sps> times 2710 * or until it fails, whichever comes first, 2711 * each time allocating the extents required to 2712 * create the soft partition without actually 2713 * creating the soft partition. 2714 */ 2715 number_of_extents_used = meta_sp_alloc_by_len( 2716 TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2717 extent_listpp, &desired_ext_length, 2718 NO_OFFSET, alignment); 2719 if (number_of_extents_used == -1) { 2720 enough_space = B_FALSE; 2721 } else { 2722 number_of_sps++; 2723 } 2724 } 2725 return (enough_space); 2726 } 2727 2728 /* 2729 * IMPORTANT NOTE: This is a static function that calls other functions 2730 * that check its mdsetnamep and device_mdnamep 2731 * input parameters, but expects extent_listpp to 2732 * be a initialized to a valid address to which 2733 * it can write a reference to the extent list that 2734 * it creates. 2735 * 2736 * FUNCTION: meta_sp_get_extent_list() 2737 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2738 * for the set containing the device for 2739 * which the extents are to be listed 2740 * device_mdnamep - a reference to the mdname_t structure 2741 * for the device for which the extents 2742 * are to be listed 2743 * OUTPUT: *extent_listpp - a reference to the extent list for 2744 * the device; NULL if the function fails 2745 * *ep - the libmeta error encountered, if any 2746 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2747 * B_FALSE if not 2748 * PURPOSE: gets the extent list for a device 2749 */ 2750 static boolean_t 2751 meta_sp_get_extent_list( 2752 mdsetname_t *mdsetnamep, 2753 mdname_t *device_mdnamep, 2754 sp_ext_node_t **extent_listpp, 2755 md_error_t *ep 2756 ) 2757 { 2758 diskaddr_t device_size_in_blocks; 2759 mdnamelist_t *sp_name_listp; 2760 diskaddr_t start_block_address_in_blocks; 2761 2762 *extent_listpp = NULL; 2763 sp_name_listp = NULL; 2764 2765 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2766 device_mdnamep, ep); 2767 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2768 if (getenv(META_SP_DEBUG)) { 2769 mde_perror(ep, 2770 "meta_sp_get_extent_list:meta_sp_get_start"); 2771 } 2772 return (B_FALSE); 2773 } 2774 2775 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2776 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2777 if (getenv(META_SP_DEBUG)) { 2778 mde_perror(ep, 2779 "meta_sp_get_extent_list:metagetsize"); 2780 } 2781 return (B_FALSE); 2782 } 2783 2784 /* 2785 * Sanity check: the start block will have skipped an integer 2786 * number of cylinders, C. C will usually be zero. If (C > 0), 2787 * and the disk slice happens to only be C cylinders in total 2788 * size, we'll fail this check. 2789 */ 2790 if (device_size_in_blocks <= 2791 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2792 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2793 return (B_FALSE); 2794 } 2795 2796 /* 2797 * After this point, we will have allocated resources, so any 2798 * failure returns must be through the supplied "fail" label 2799 * to properly deallocate things. 2800 */ 2801 2802 /* 2803 * Create an empty extent list that starts one watermark past 2804 * the start block of the device and ends one watermark before 2805 * the end of the device. 2806 */ 2807 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2808 extent_listpp, NO_OFFSET, 2809 (sp_ext_length_t)start_block_address_in_blocks, 2810 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2811 meta_sp_cmp_by_offset); 2812 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2813 extent_listpp, (sp_ext_offset_t)(device_size_in_blocks - 2814 MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, 2815 NO_FLAGS, meta_sp_cmp_by_offset); 2816 2817 /* 2818 * Get the list of soft partitions that are already on the 2819 * device. 2820 */ 2821 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2822 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2823 if (getenv(META_SP_DEBUG)) { 2824 mde_perror(ep, 2825 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2826 } 2827 goto fail; 2828 } 2829 2830 if (sp_name_listp != NULL) { 2831 /* 2832 * If there are soft partitions on the device, add the 2833 * extents used in them to the extent list. 2834 */ 2835 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2836 extent_listpp, ep) == -1) { 2837 if (getenv(META_SP_DEBUG)) { 2838 mde_perror(ep, "meta_sp_get_extent_list:" 2839 "meta_sp_extlist_from_namelist"); 2840 } 2841 goto fail; 2842 } 2843 metafreenamelist(sp_name_listp); 2844 } 2845 2846 /* 2847 * Add free extents to the extent list to represent 2848 * the remaining regions of free space on the 2849 * device. 2850 */ 2851 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2852 return (B_TRUE); 2853 2854 fail: 2855 if (sp_name_listp != NULL) { 2856 metafreenamelist(sp_name_listp); 2857 } 2858 2859 if (*extent_listpp != NULL) { 2860 /* 2861 * meta_sp_list_free sets *extent_listpp to NULL. 2862 */ 2863 meta_sp_list_free(extent_listpp); 2864 } 2865 return (B_FALSE); 2866 } 2867 2868 /* 2869 * IMPORTANT NOTE: This is a static function that calls other functions 2870 * that check its mdsetnamep and mddrivenamep 2871 * input parameters, but expects extent_listpp to 2872 * be a initialized to a valid address to which 2873 * it can write a reference to the extent list that 2874 * it creates. 2875 * 2876 * FUNCTION: meta_sp_get_extent_list_for_drive() 2877 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2878 * for the set containing the drive for 2879 * which the extents are to be listed 2880 * mddrivenamep - a reference to the mddrivename_t structure 2881 * for the drive for which the extents 2882 * are to be listed 2883 * OUTPUT: *extent_listpp - a reference to the extent list for 2884 * the drive; NULL if the function fails 2885 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2886 * B_FALSE if not 2887 * PURPOSE: gets the extent list for a drive when the entire drive 2888 * is to be soft partitioned 2889 */ 2890 static boolean_t 2891 meta_sp_get_extent_list_for_drive( 2892 mdsetname_t *mdsetnamep, 2893 mddrivename_t *mddrivenamep, 2894 sp_ext_node_t **extent_listpp 2895 ) 2896 { 2897 boolean_t can_use; 2898 diskaddr_t free_space; 2899 md_error_t mderror; 2900 mdvtoc_t proposed_vtoc; 2901 int repartition_options; 2902 int return_value; 2903 md_sp_t test_sp_struct; 2904 2905 can_use = B_TRUE; 2906 *extent_listpp = NULL; 2907 mderror = mdnullerror; 2908 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2909 &mderror); 2910 if (test_sp_struct.compnamep == NULL) { 2911 can_use = B_FALSE; 2912 } 2913 2914 if (can_use == B_TRUE) { 2915 mderror = mdnullerror; 2916 repartition_options = 0; 2917 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2918 MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror); 2919 if (return_value != 0) { 2920 can_use = B_FALSE; 2921 } 2922 } 2923 2924 if (can_use == B_TRUE) { 2925 mderror = mdnullerror; 2926 repartition_options = repartition_options | 2927 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2928 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2929 repartition_options, &proposed_vtoc, &mderror); 2930 if (return_value != 0) { 2931 can_use = B_FALSE; 2932 } 2933 } 2934 2935 if (can_use == B_TRUE) { 2936 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2937 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2938 can_use = B_FALSE; 2939 } 2940 } 2941 2942 if (can_use == B_TRUE) { 2943 /* 2944 * Create an extent list that starts with 2945 * a reserved extent that ends at the start 2946 * of the usable space on slice zero of the 2947 * proposed VTOC, ends with an extent that 2948 * reserves space for a watermark at the end 2949 * of slice zero, and contains a single free 2950 * extent that occupies the rest of the space 2951 * on the slice. 2952 * 2953 * NOTE: 2954 * 2955 * Don't use metagetstart() or metagetsize() to 2956 * find the usable space. They query the mdname_t 2957 * structure that represents an actual device to 2958 * determine the amount of space on the device that 2959 * contains metadata and the total amount of space 2960 * on the device. Since this function creates a 2961 * proposed extent list that doesn't reflect the 2962 * state of an actual device, there's no mdname_t 2963 * structure to be queried. 2964 * 2965 * When a drive is reformatted to prepare for 2966 * soft partitioning, all of slice seven is 2967 * reserved for metadata, all of slice zero is 2968 * available for soft partitioning, and all other 2969 * slices on the drive are empty. The proposed 2970 * extent list for the drive therefore contains 2971 * only three extents: a reserved extent that ends 2972 * at the start of the usable space on slice zero, 2973 * a single free extent that occupies all the usable 2974 * space on slice zero, and an ending extent that 2975 * reserves space for a watermark at the end of 2976 * slice zero. 2977 */ 2978 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2979 extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START), 2980 EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS, 2981 meta_sp_cmp_by_offset); 2982 meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP, 2983 extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2984 MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS, 2985 meta_sp_cmp_by_offset); 2986 meta_sp_list_freefill(extent_listpp, free_space); 2987 } 2988 return (can_use); 2989 } 2990 2991 /* 2992 * FUNCTION: meta_sp_can_create_sps() 2993 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2994 * for the set containing the device for 2995 * which the extents are to be listed 2996 * mdnamep - a reference to the mdname_t of the device 2997 * on which the soft parititions are to be created 2998 * number_of_sps - the desired number of soft partitions 2999 * sp_size - the desired soft partition size 3000 * OUTPUT: boolean_t return value 3001 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3002 * B_FALSE if not 3003 * PURPOSE: determines whether a set of soft partitions can be created 3004 * on a device 3005 */ 3006 boolean_t 3007 meta_sp_can_create_sps( 3008 mdsetname_t *mdsetnamep, 3009 mdname_t *mdnamep, 3010 int number_of_sps, 3011 blkcnt_t sp_size 3012 ) 3013 { 3014 sp_ext_node_t *extent_listp; 3015 boolean_t succeeded; 3016 md_error_t mde; 3017 3018 if ((number_of_sps > 0) && (sp_size > 0)) { 3019 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3020 &extent_listp, &mde); 3021 } else { 3022 succeeded = B_FALSE; 3023 } 3024 3025 /* 3026 * We don't really care about an error return from the 3027 * alignment call; that will just result in passing zero, 3028 * which will be interpreted as no alignment. 3029 */ 3030 3031 if (succeeded == B_TRUE) { 3032 succeeded = meta_sp_enough_space(number_of_sps, 3033 sp_size, &extent_listp, 3034 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 3035 meta_sp_list_free(&extent_listp); 3036 } 3037 return (succeeded); 3038 } 3039 3040 /* 3041 * FUNCTION: meta_sp_can_create_sps_on_drive() 3042 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3043 * for the set containing the drive for 3044 * which the extents are to be listed 3045 * mddrivenamep - a reference to the mddrivename_t of the drive 3046 * on which the soft parititions are to be created 3047 * number_of_sps - the desired number of soft partitions 3048 * sp_size - the desired soft partition size 3049 * OUTPUT: boolean_t return value 3050 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 3051 * B_FALSE if not 3052 * PURPOSE: determines whether a set of soft partitions can be created 3053 * on a drive if the entire drive is soft partitioned 3054 */ 3055 boolean_t 3056 meta_sp_can_create_sps_on_drive( 3057 mdsetname_t *mdsetnamep, 3058 mddrivename_t *mddrivenamep, 3059 int number_of_sps, 3060 blkcnt_t sp_size 3061 ) 3062 { 3063 sp_ext_node_t *extent_listp; 3064 boolean_t succeeded; 3065 3066 if ((number_of_sps > 0) && (sp_size > 0)) { 3067 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3068 mddrivenamep, &extent_listp); 3069 } else { 3070 succeeded = B_FALSE; 3071 } 3072 3073 /* 3074 * We don't care about alignment on the space call because 3075 * we're specifically dealing with a drive, which will have no 3076 * inherent alignment. 3077 */ 3078 3079 if (succeeded == B_TRUE) { 3080 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3081 &extent_listp, SP_UNALIGNED); 3082 meta_sp_list_free(&extent_listp); 3083 } 3084 return (succeeded); 3085 } 3086 3087 /* 3088 * FUNCTION: meta_sp_get_free_space() 3089 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3090 * for the set containing the device for 3091 * which the free space is to be returned 3092 * mdnamep - a reference to the mdname_t of the device 3093 * for which the free space is to be returned 3094 * OUTPUT: blkcnt_t return value 3095 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3096 * PURPOSE: returns the number of blocks of free space on a device 3097 */ 3098 blkcnt_t 3099 meta_sp_get_free_space( 3100 mdsetname_t *mdsetnamep, 3101 mdname_t *mdnamep 3102 ) 3103 { 3104 sp_ext_node_t *extent_listp; 3105 sp_ext_length_t free_blocks; 3106 boolean_t succeeded; 3107 md_error_t mde; 3108 3109 extent_listp = NULL; 3110 free_blocks = 0; 3111 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3112 &extent_listp, &mde); 3113 if (succeeded == B_TRUE) { 3114 free_blocks = meta_sp_list_size(extent_listp, 3115 EXTTYP_FREE, INCLUDE_WM); 3116 meta_sp_list_free(&extent_listp); 3117 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3118 /* 3119 * Subtract a safety margin for watermarks when 3120 * computing the number of blocks available for 3121 * use. The actual number of watermarks can't 3122 * be calculated without knowing the exact numbers 3123 * and sizes of both the free extents and the soft 3124 * partitions to be created. The calculation is 3125 * highly complex and error-prone even if those 3126 * quantities are known. The approximate value 3127 * 10 * MD_SP_WMSIZE is within a few blocks of the 3128 * correct value in all practical cases. 3129 */ 3130 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3131 } else { 3132 free_blocks = 0; 3133 } 3134 } else { 3135 mdclrerror(&mde); 3136 } 3137 3138 return (free_blocks); 3139 } 3140 3141 /* 3142 * FUNCTION: meta_sp_get_free_space_on_drive() 3143 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3144 * for the set containing the drive for 3145 * which the free space is to be returned 3146 * mddrivenamep - a reference to the mddrivename_t of the drive 3147 * for which the free space is to be returned 3148 * OUTPUT: blkcnt_t return value 3149 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3150 * PURPOSE: returns the number of blocks of space usable for soft 3151 * partitions on an entire drive, if the entire drive is 3152 * soft partitioned 3153 */ 3154 blkcnt_t 3155 meta_sp_get_free_space_on_drive( 3156 mdsetname_t *mdsetnamep, 3157 mddrivename_t *mddrivenamep 3158 ) 3159 { 3160 sp_ext_node_t *extent_listp; 3161 sp_ext_length_t free_blocks; 3162 boolean_t succeeded; 3163 3164 extent_listp = NULL; 3165 free_blocks = 0; 3166 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3167 mddrivenamep, &extent_listp); 3168 if (succeeded == B_TRUE) { 3169 free_blocks = meta_sp_list_size(extent_listp, 3170 EXTTYP_FREE, INCLUDE_WM); 3171 meta_sp_list_free(&extent_listp); 3172 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3173 /* 3174 * Subtract a safety margin for watermarks when 3175 * computing the number of blocks available for 3176 * use. The actual number of watermarks can't 3177 * be calculated without knowing the exact numbers 3178 * and sizes of both the free extents and the soft 3179 * partitions to be created. The calculation is 3180 * highly complex and error-prone even if those 3181 * quantities are known. The approximate value 3182 * 10 * MD_SP_WMSIZE is within a few blocks of the 3183 * correct value in all practical cases. 3184 */ 3185 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3186 } else { 3187 free_blocks = 0; 3188 } 3189 } 3190 return (free_blocks); 3191 } 3192 3193 /* 3194 * FUNCTION: meta_sp_get_number_of_possible_sps() 3195 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3196 * for the set containing the device for 3197 * which the number of possible soft partitions 3198 * is to be returned 3199 * mdnamep - a reference to the mdname_t of the device 3200 * for which the number of possible soft partitions 3201 * is to be returned 3202 * OUTPUT: int return value 3203 * RETURNS: int - the number of soft partitions of the desired size 3204 * that can be created on the device 3205 * PURPOSE: returns the number of soft partitions of a given size 3206 * that can be created on a device 3207 */ 3208 int 3209 meta_sp_get_number_of_possible_sps( 3210 mdsetname_t *mdsetnamep, 3211 mdname_t *mdnamep, 3212 blkcnt_t sp_size 3213 ) 3214 { 3215 sp_ext_node_t *extent_listp; 3216 int number_of_possible_sps; 3217 boolean_t succeeded; 3218 md_error_t mde; 3219 sp_ext_length_t alignment; 3220 3221 extent_listp = NULL; 3222 number_of_possible_sps = 0; 3223 if (sp_size > 0) { 3224 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3225 mdnamep, &extent_listp, &mde)) == B_FALSE) 3226 mdclrerror(&mde); 3227 } else { 3228 succeeded = B_FALSE; 3229 } 3230 3231 if (succeeded == B_TRUE) { 3232 alignment = meta_sp_get_default_alignment(mdsetnamep, 3233 mdnamep, &mde); 3234 } 3235 3236 while (succeeded == B_TRUE) { 3237 /* 3238 * Keep allocating space from the extent list 3239 * for soft partitions of the desired size until 3240 * there's not enough free space left in the list 3241 * for another soft partiition of that size. 3242 * Add one to the number of possible soft partitions 3243 * for each soft partition for which there is 3244 * enough free space left. 3245 */ 3246 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3247 sp_size, &extent_listp, alignment); 3248 if (succeeded == B_TRUE) { 3249 number_of_possible_sps++; 3250 } 3251 } 3252 if (extent_listp != NULL) { 3253 meta_sp_list_free(&extent_listp); 3254 } 3255 return (number_of_possible_sps); 3256 } 3257 3258 /* 3259 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3260 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3261 * for the set containing the drive for 3262 * which the number of possible soft partitions 3263 * is to be returned 3264 * mddrivenamep - a reference to the mddrivename_t of the drive 3265 * for which the number of possible soft partitions 3266 * is to be returned 3267 * sp_size - the size in blocks of the proposed soft partitions 3268 * OUTPUT: int return value 3269 * RETURNS: int - the number of soft partitions of the desired size 3270 * that can be created on the drive 3271 * PURPOSE: returns the number of soft partitions of a given size 3272 * that can be created on a drive, if the entire drive is 3273 * soft partitioned 3274 */ 3275 int 3276 meta_sp_get_number_of_possible_sps_on_drive( 3277 mdsetname_t *mdsetnamep, 3278 mddrivename_t *mddrivenamep, 3279 blkcnt_t sp_size 3280 ) 3281 { 3282 sp_ext_node_t *extent_listp; 3283 int number_of_possible_sps; 3284 boolean_t succeeded; 3285 3286 extent_listp = NULL; 3287 number_of_possible_sps = 0; 3288 if (sp_size > 0) { 3289 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3290 mddrivenamep, &extent_listp); 3291 } else { 3292 succeeded = B_FALSE; 3293 } 3294 while (succeeded == B_TRUE) { 3295 /* 3296 * Keep allocating space from the extent list 3297 * for soft partitions of the desired size until 3298 * there's not enough free space left in the list 3299 * for another soft partition of that size. 3300 * Add one to the number of possible soft partitions 3301 * for each soft partition for which there is 3302 * enough free space left. 3303 * 3304 * Since it's a drive, not a metadevice, make no 3305 * assumptions about alignment. 3306 */ 3307 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3308 sp_size, &extent_listp, SP_UNALIGNED); 3309 if (succeeded == B_TRUE) { 3310 number_of_possible_sps++; 3311 } 3312 } 3313 if (extent_listp != NULL) { 3314 meta_sp_list_free(&extent_listp); 3315 } 3316 return (number_of_possible_sps); 3317 } 3318 3319 /* 3320 * FUNCTION: meta_sp_get_possible_sp_size() 3321 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3322 * for the set containing the device for 3323 * which the possible soft partition size 3324 * is to be returned 3325 * mdnamep - a reference to the mdname_t of the device 3326 * for which the possible soft partition size 3327 * is to be returned 3328 * number_of_sps - the desired number of soft partitions 3329 * OUTPUT: blkcnt_t return value 3330 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3331 * PURPOSE: returns the maximum possible size of each of a given number of 3332 * soft partitions of equal size that can be created on a device 3333 */ 3334 blkcnt_t 3335 meta_sp_get_possible_sp_size( 3336 mdsetname_t *mdsetnamep, 3337 mdname_t *mdnamep, 3338 int number_of_sps 3339 ) 3340 { 3341 blkcnt_t free_blocks; 3342 blkcnt_t sp_size; 3343 boolean_t succeeded; 3344 3345 sp_size = 0; 3346 if (number_of_sps > 0) { 3347 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3348 sp_size = free_blocks / number_of_sps; 3349 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3350 number_of_sps, sp_size); 3351 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3352 /* 3353 * To compensate for space that may have been 3354 * occupied by watermarks, reduce sp_size by a 3355 * number of blocks equal to the number of soft 3356 * partitions desired, and test again to see 3357 * whether the desired number of soft partitions 3358 * can be created. 3359 */ 3360 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3361 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3362 number_of_sps, sp_size); 3363 } 3364 if (sp_size < 0) { 3365 sp_size = 0; 3366 } 3367 } 3368 return (sp_size); 3369 } 3370 3371 /* 3372 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3373 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3374 * for the set containing the drive for 3375 * which the possible soft partition size 3376 * is to be returned 3377 * mddrivenamep - a reference to the mddrivename_t of the drive 3378 * for which the possible soft partition size 3379 * is to be returned 3380 * number_of_sps - the desired number of soft partitions 3381 * OUTPUT: blkcnt_t return value 3382 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3383 * PURPOSE: returns the maximum possible size of each of a given number of 3384 * soft partitions of equal size that can be created on a drive 3385 * if the entire drive is soft partitioned 3386 */ 3387 blkcnt_t 3388 meta_sp_get_possible_sp_size_on_drive( 3389 mdsetname_t *mdsetnamep, 3390 mddrivename_t *mddrivenamep, 3391 int number_of_sps 3392 ) 3393 { 3394 blkcnt_t free_blocks; 3395 blkcnt_t sp_size; 3396 boolean_t succeeded; 3397 3398 sp_size = 0; 3399 if (number_of_sps > 0) { 3400 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3401 mddrivenamep); 3402 sp_size = free_blocks / number_of_sps; 3403 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3404 mddrivenamep, number_of_sps, sp_size); 3405 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3406 /* 3407 * To compensate for space that may have been 3408 * occupied by watermarks, reduce sp_size by a 3409 * number of blocks equal to the number of soft 3410 * partitions desired, and test again to see 3411 * whether the desired number of soft partitions 3412 * can be created. 3413 */ 3414 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3415 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3416 mddrivenamep, number_of_sps, sp_size); 3417 } 3418 if (sp_size < 0) { 3419 sp_size = 0; 3420 } 3421 } 3422 return (sp_size); 3423 } 3424 3425 /* 3426 * ************************************************************************** 3427 * Unit Structure Manipulation Functions * 3428 * ************************************************************************** 3429 */ 3430 3431 /* 3432 * FUNCTION: meta_sp_fillextarray() 3433 * INPUT: mp - the unit structure to fill 3434 * extlist - the list of extents to fill with 3435 * OUTPUT: none 3436 * RETURNS: void 3437 * PURPOSE: fills in the unit structure extent list with the extents 3438 * specified by extlist. Only extents in extlist with the 3439 * EXTFLG_UPDATE flag are changed in the unit structure, 3440 * and the index into the unit structure is the sequence 3441 * number in the extent list. After all of the nodes have 3442 * been updated the virtual offsets in the unit structure 3443 * are updated to reflect the new lengths. 3444 */ 3445 static void 3446 meta_sp_fillextarray( 3447 mp_unit_t *mp, 3448 sp_ext_node_t *extlist 3449 ) 3450 { 3451 int i; 3452 sp_ext_node_t *ext; 3453 sp_ext_offset_t curvoff = 0LL; 3454 3455 assert(mp != NULL); 3456 3457 /* go through the allocation list and fill in our unit structure */ 3458 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3459 if ((ext->ext_type == EXTTYP_ALLOC) && 3460 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3461 mp->un_ext[ext->ext_seq].un_poff = 3462 ext->ext_offset + MD_SP_WMSIZE; 3463 mp->un_ext[ext->ext_seq].un_len = 3464 ext->ext_length - MD_SP_WMSIZE; 3465 } 3466 } 3467 3468 for (i = 0; i < mp->un_numexts; i++) { 3469 assert(mp->un_ext[i].un_poff != 0); 3470 assert(mp->un_ext[i].un_len != 0); 3471 mp->un_ext[i].un_voff = curvoff; 3472 curvoff += mp->un_ext[i].un_len; 3473 } 3474 } 3475 3476 /* 3477 * FUNCTION: meta_sp_createunit() 3478 * INPUT: np - the name of the device to create a unit structure for 3479 * compnp - the name of the device the soft partition is on 3480 * extlist - the extent list to populate the new unit with 3481 * numexts - the number of extents in the extent list 3482 * len - the total size of the soft partition (sectors) 3483 * status - the initial status of the unit structure 3484 * OUTPUT: ep - return error pointer 3485 * RETURNS: mp_unit_t * - the new unit structure. 3486 * PURPOSE: allocates and fills in a new soft partition unit 3487 * structure to be passed to the soft partitioning driver 3488 * for creation. 3489 */ 3490 static mp_unit_t * 3491 meta_sp_createunit( 3492 mdname_t *np, 3493 mdname_t *compnp, 3494 sp_ext_node_t *extlist, 3495 int numexts, 3496 sp_ext_length_t len, 3497 sp_status_t status, 3498 md_error_t *ep 3499 ) 3500 { 3501 mp_unit_t *mp; 3502 uint_t ms_size; 3503 3504 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3505 (numexts * sizeof (mp->un_ext[0])); 3506 3507 mp = Zalloc(ms_size); 3508 3509 /* fill in fields in common unit structure */ 3510 mp->c.un_type = MD_METASP; 3511 mp->c.un_size = ms_size; 3512 MD_SID(mp) = meta_getminor(np->dev); 3513 mp->c.un_total_blocks = len; 3514 mp->c.un_actual_tb = len; 3515 3516 /* set up geometry */ 3517 (void) meta_sp_setgeom(np, compnp, mp, ep); 3518 3519 /* if we're building on metadevice we can't parent */ 3520 if (metaismeta(compnp)) 3521 MD_CAPAB(mp) = MD_CANT_PARENT; 3522 else 3523 MD_CAPAB(mp) = MD_CAN_PARENT; 3524 3525 /* fill soft partition-specific fields */ 3526 mp->un_dev = compnp->dev; 3527 mp->un_key = compnp->key; 3528 3529 /* mdname_t start_blk field is not 64-bit! */ 3530 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3531 mp->un_status = status; 3532 mp->un_numexts = numexts; 3533 mp->un_length = len; 3534 3535 /* fill in the extent array */ 3536 meta_sp_fillextarray(mp, extlist); 3537 3538 return (mp); 3539 } 3540 3541 /* 3542 * FUNCTION: meta_sp_updateunit() 3543 * INPUT: np - name structure for the metadevice being updated 3544 * old_un - the original unit structure that is being updated 3545 * extlist - the extent list to populate the new unit with 3546 * grow_len - the amount by which the partition is being grown 3547 * numexts - the number of extents in the extent list 3548 * ep - return error pointer 3549 * OUTPUT: none 3550 * RETURNS: mp_unit_t * - the updated unit structure 3551 * PURPOSE: allocates and fills in a new soft partition unit structure to 3552 * be passed to the soft partitioning driver for creation. The 3553 * old unit structure is first copied in, and then the updated 3554 * extents are changed in the new unit structure. This is 3555 * typically used when the size of an existing unit is changed. 3556 */ 3557 static mp_unit_t * 3558 meta_sp_updateunit( 3559 mdname_t *np, 3560 mp_unit_t *old_un, 3561 sp_ext_node_t *extlist, 3562 sp_ext_length_t grow_len, 3563 int numexts, 3564 md_error_t *ep 3565 ) 3566 { 3567 mp_unit_t *new_un; 3568 sp_ext_length_t new_len; 3569 uint_t new_size; 3570 3571 assert(old_un != NULL); 3572 assert(extlist != NULL); 3573 3574 /* allocate new unit structure and copy in old unit */ 3575 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3576 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3577 new_len = old_un->un_length + grow_len; 3578 new_un = Zalloc(new_size); 3579 bcopy(old_un, new_un, old_un->c.un_size); 3580 3581 /* update size and geometry information */ 3582 new_un->c.un_size = new_size; 3583 new_un->un_length = new_len; 3584 new_un->c.un_total_blocks = new_len; 3585 new_un->c.un_actual_tb = new_len; 3586 if (meta_adjust_geom((md_unit_t *)new_un, np, 3587 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3588 0, ep) != 0) { 3589 Free(new_un); 3590 return (NULL); 3591 } 3592 3593 /* update extent information */ 3594 new_un->un_numexts += numexts; 3595 3596 meta_sp_fillextarray(new_un, extlist); 3597 3598 return (new_un); 3599 } 3600 3601 /* 3602 * FUNCTION: meta_get_sp() 3603 * INPUT: sp - the set name for the device to get 3604 * np - the name of the device to get 3605 * OUTPUT: ep - return error pointer 3606 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3607 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3608 * for the named device. Just a wrapper for meta_get_sp_common(). 3609 */ 3610 md_sp_t * 3611 meta_get_sp( 3612 mdsetname_t *sp, 3613 mdname_t *np, 3614 md_error_t *ep 3615 ) 3616 { 3617 return (meta_get_sp_common(sp, np, 0, ep)); 3618 } 3619 3620 /* 3621 * FUNCTION: meta_get_sp_common() 3622 * INPUT: sp - the set name for the device to get 3623 * np - the name of the device to get 3624 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3625 * OUTPUT: ep - return error pointer 3626 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3627 * NULL if np is not a soft partition 3628 * PURPOSE: common routine for fetching a soft partition unit structure 3629 */ 3630 md_sp_t * 3631 meta_get_sp_common( 3632 mdsetname_t *sp, 3633 mdname_t *np, 3634 int fast, 3635 md_error_t *ep 3636 ) 3637 { 3638 mddrivename_t *dnp = np->drivenamep; 3639 char *miscname; 3640 mp_unit_t *mp; 3641 md_sp_t *msp; 3642 int i; 3643 3644 /* must have set */ 3645 assert(sp != NULL); 3646 3647 /* short circuit */ 3648 if (dnp->unitp != NULL) { 3649 if (dnp->unitp->type != MD_METASP) 3650 return (NULL); 3651 return ((md_sp_t *)dnp->unitp); 3652 } 3653 /* get miscname and unit */ 3654 if ((miscname = metagetmiscname(np, ep)) == NULL) 3655 return (NULL); 3656 3657 if (strcmp(miscname, MD_SP) != 0) { 3658 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3659 return (NULL); 3660 } 3661 3662 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3663 return (NULL); 3664 3665 assert(mp->c.un_type == MD_METASP); 3666 3667 /* allocate soft partition */ 3668 msp = Zalloc(sizeof (*msp)); 3669 3670 /* get the common information */ 3671 msp->common.namep = np; 3672 msp->common.type = mp->c.un_type; 3673 msp->common.state = mp->c.un_status; 3674 msp->common.capabilities = mp->c.un_capabilities; 3675 msp->common.parent = mp->c.un_parent; 3676 msp->common.size = mp->c.un_total_blocks; 3677 msp->common.user_flags = mp->c.un_user_flags; 3678 msp->common.revision = mp->c.un_revision; 3679 3680 /* get soft partition information */ 3681 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3682 goto out; 3683 3684 /* 3685 * Fill in the key and the start block. Note that the start 3686 * block in the unit structure is 64 bits but the name pointer 3687 * only supports 32 bits. 3688 */ 3689 msp->compnamep->key = mp->un_key; 3690 msp->compnamep->start_blk = mp->un_start_blk; 3691 3692 /* fill in status field */ 3693 msp->status = mp->un_status; 3694 3695 /* allocate the extents */ 3696 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3697 msp->ext.ext_len = mp->un_numexts; 3698 3699 /* do the extents for this soft partition */ 3700 for (i = 0; i < mp->un_numexts; i++) { 3701 struct mp_ext *mde = &mp->un_ext[i]; 3702 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3703 3704 extp->voff = mde->un_voff; 3705 extp->poff = mde->un_poff; 3706 extp->len = mde->un_len; 3707 } 3708 3709 /* cleanup, return success */ 3710 Free(mp); 3711 dnp->unitp = (md_common_t *)msp; 3712 return (msp); 3713 3714 out: 3715 /* clean up and return error */ 3716 Free(mp); 3717 Free(msp); 3718 return (NULL); 3719 } 3720 3721 3722 /* 3723 * FUNCTION: meta_init_sp() 3724 * INPUT: spp - the set name for the new device 3725 * argc - the remaining argument count for the metainit cmdline 3726 * argv - the remainder of the unparsed command line 3727 * options - global options parsed by metainit 3728 * OUTPUT: ep - return error pointer 3729 * RETURNS: int - -1 failure, 0 success 3730 * PURPOSE: provides the command line parsing and name management overhead 3731 * for creating a new soft partition. Ultimately this calls 3732 * meta_create_sp() which does the real work of allocating space 3733 * for the new soft partition. 3734 */ 3735 int 3736 meta_init_sp( 3737 mdsetname_t **spp, 3738 int argc, 3739 char *argv[], 3740 mdcmdopts_t options, 3741 md_error_t *ep 3742 ) 3743 { 3744 char *compname = NULL; 3745 mdname_t *spcompnp = NULL; /* name of component volume */ 3746 char *devname = argv[0]; /* unit name */ 3747 mdname_t *np = NULL; /* name of soft partition */ 3748 md_sp_t *msp = NULL; 3749 int c; 3750 int old_optind; 3751 sp_ext_length_t len = 0LL; 3752 int rval = -1; 3753 uint_t seq; 3754 int oflag; 3755 int failed; 3756 mddrivename_t *dnp = NULL; 3757 sp_ext_length_t alignment = 0LL; 3758 sp_ext_node_t *extlist = NULL; 3759 3760 assert(argc > 0); 3761 3762 /* expect sp name, -p, optional -e, compname, and size parameters */ 3763 /* grab soft partition name */ 3764 if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL) 3765 goto out; 3766 3767 /* see if it exists already */ 3768 if (metagetmiscname(np, ep) != NULL) { 3769 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3770 meta_getminor(np->dev), devname); 3771 goto out; 3772 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3773 goto out; 3774 } else { 3775 mdclrerror(ep); 3776 } 3777 --argc, ++argv; 3778 3779 if (argc == 0) 3780 goto syntax; 3781 3782 /* grab -p */ 3783 if (strcmp(argv[0], "-p") != 0) 3784 goto syntax; 3785 --argc, ++argv; 3786 3787 if (argc == 0) 3788 goto syntax; 3789 3790 /* see if -e is there */ 3791 if (strcmp(argv[0], "-e") == 0) { 3792 /* use the whole disk */ 3793 options |= MDCMD_USE_WHOLE_DISK; 3794 --argc, ++argv; 3795 } 3796 3797 if (argc == 0) 3798 goto syntax; 3799 3800 /* get component name */ 3801 compname = Strdup(argv[0]); 3802 3803 if (options & MDCMD_USE_WHOLE_DISK) { 3804 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3805 goto out; 3806 } 3807 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3808 goto out; 3809 } 3810 } else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) { 3811 goto out; 3812 } 3813 assert(*spp != NULL); 3814 3815 if (!(options & MDCMD_NOLOCK)) { 3816 /* grab set lock */ 3817 if (meta_lock(*spp, TRUE, ep)) 3818 goto out; 3819 3820 if (meta_check_ownership(*spp, ep) != 0) 3821 goto out; 3822 } 3823 3824 /* allocate the soft partition */ 3825 msp = Zalloc(sizeof (*msp)); 3826 3827 /* setup common */ 3828 msp->common.namep = np; 3829 msp->common.type = MD_METASP; 3830 3831 compname = spcompnp->cname; 3832 3833 assert(spcompnp->rname != NULL); 3834 --argc, ++argv; 3835 3836 if (argc == 0) { 3837 goto syntax; 3838 } 3839 3840 if (*argv[0] == '-') { 3841 /* 3842 * parse any other command line options, this includes 3843 * the recovery options -o and -b. The special thing 3844 * with these options is that the len needs to be 3845 * kept track of otherwise when the geometry of the 3846 * "device" is built it will create an invalid geometry 3847 */ 3848 old_optind = optind = 0; 3849 opterr = 0; 3850 oflag = 0; 3851 seq = 0; 3852 failed = 0; 3853 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3854 sp_ext_offset_t offset; 3855 sp_ext_length_t length; 3856 longlong_t tmp_size; 3857 3858 switch (c) { 3859 case 'A': /* data alignment */ 3860 if (meta_sp_parsesizestring(optarg, 3861 &alignment) == -1) { 3862 failed = 1; 3863 } 3864 break; 3865 case 'o': /* offset in the partition */ 3866 if (oflag == 1) { 3867 failed = 1; 3868 } else { 3869 tmp_size = atoll(optarg); 3870 if (tmp_size <= 0) { 3871 failed = 1; 3872 } else { 3873 oflag = 1; 3874 options |= MDCMD_DIRECT; 3875 3876 offset = tmp_size; 3877 } 3878 } 3879 3880 break; 3881 case 'b': /* number of blocks */ 3882 if (oflag == 0) { 3883 failed = 1; 3884 } else { 3885 tmp_size = atoll(optarg); 3886 if (tmp_size <= 0) { 3887 failed = 1; 3888 } else { 3889 oflag = 0; 3890 3891 length = tmp_size; 3892 3893 /* we have a pair of values */ 3894 meta_sp_list_insert(*spp, np, 3895 &extlist, offset, length, 3896 EXTTYP_ALLOC, seq++, 3897 EXTFLG_UPDATE, 3898 meta_sp_cmp_by_offset); 3899 len += length; 3900 } 3901 } 3902 3903 break; 3904 default: 3905 argc -= old_optind; 3906 argv += old_optind; 3907 goto options; 3908 } 3909 3910 if (failed) { 3911 argc -= old_optind; 3912 argv += old_optind; 3913 goto syntax; 3914 } 3915 3916 old_optind = optind; 3917 } 3918 argc -= optind; 3919 argv += optind; 3920 3921 /* 3922 * Must have matching pairs of -o and -b flags 3923 */ 3924 if (oflag != 0) 3925 goto syntax; 3926 3927 /* 3928 * Can't specify both layout (indicated indirectly by 3929 * len being set by thye -o/-b cases above) AND 3930 * alignment 3931 */ 3932 if ((len > 0LL) && (alignment > 0LL)) 3933 goto syntax; 3934 3935 /* 3936 * sanity check the allocation list 3937 */ 3938 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3939 goto syntax; 3940 } 3941 3942 if (len == 0LL) { 3943 if (argc == 0) 3944 goto syntax; 3945 if (meta_sp_parsesize(argv[0], &len) == -1) 3946 goto syntax; 3947 --argc, ++argv; 3948 } 3949 3950 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3951 msp->ext.ext_val->len = len; 3952 msp->compnamep = spcompnp; 3953 3954 /* we should be at the end */ 3955 if (argc != 0) 3956 goto syntax; 3957 3958 /* create soft partition */ 3959 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3960 goto out; 3961 rval = 0; 3962 3963 /* let em know */ 3964 if (options & MDCMD_PRINT) { 3965 (void) printf(dgettext(TEXT_DOMAIN, 3966 "%s: Soft Partition is setup\n"), 3967 devname); 3968 (void) fflush(stdout); 3969 } 3970 goto out; 3971 3972 syntax: 3973 /* syntax error */ 3974 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3975 goto out; 3976 3977 options: 3978 /* options error */ 3979 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3980 goto out; 3981 3982 out: 3983 if (msp != NULL) { 3984 if (msp->ext.ext_val != NULL) { 3985 Free(msp->ext.ext_val); 3986 } 3987 Free(msp); 3988 } 3989 3990 return (rval); 3991 } 3992 3993 /* 3994 * FUNCTION: meta_free_sp() 3995 * INPUT: msp - the soft partition unit to free 3996 * OUTPUT: none 3997 * RETURNS: void 3998 * PURPOSE: provides an interface from the rest of libmeta for freeing a 3999 * soft partition unit 4000 */ 4001 void 4002 meta_free_sp(md_sp_t *msp) 4003 { 4004 Free(msp); 4005 } 4006 4007 /* 4008 * FUNCTION: meta_sp_issp() 4009 * INPUT: sp - the set name to check 4010 * np - the name to check 4011 * OUTPUT: ep - return error pointer 4012 * RETURNS: int - 0 means sp,np is a soft partition 4013 * 1 means sp,np is not a soft partition 4014 * PURPOSE: determines whether the given device is a soft partition 4015 * device. This is called by other metadevice check routines. 4016 */ 4017 int 4018 meta_sp_issp( 4019 mdsetname_t *sp, 4020 mdname_t *np, 4021 md_error_t *ep 4022 ) 4023 { 4024 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 4025 return (1); 4026 4027 return (0); 4028 } 4029 4030 /* 4031 * FUNCTION: meta_check_sp() 4032 * INPUT: sp - the set name to check 4033 * msp - the unit structure to check 4034 * options - creation options 4035 * OUTPUT: repart_options - options to be passed to 4036 * meta_repartition_drive() 4037 * ep - return error pointer 4038 * RETURNS: int - 0 ok to create on this component 4039 * -1 error or not ok to create on this component 4040 * PURPOSE: Checks to determine whether the rules for creation of 4041 * soft partitions allow creation of a soft partition on 4042 * the device described by the mdname_t structure referred 4043 * to by msp->compnamep. 4044 * 4045 * NOTE: Does NOT check to determine whether the extents 4046 * described in the md_sp_t structure referred to by 4047 * msp will fit on the device described by the mdname_t 4048 * structure located at msp->compnamep. 4049 */ 4050 static int 4051 meta_check_sp( 4052 mdsetname_t *sp, 4053 md_sp_t *msp, 4054 mdcmdopts_t options, 4055 int *repart_options, 4056 md_error_t *ep 4057 ) 4058 { 4059 md_common_t *mdp; 4060 mdname_t *compnp = msp->compnamep; 4061 uint_t slice; 4062 mddrivename_t *dnp; 4063 mdname_t *slicenp; 4064 mdvtoc_t *vtocp; 4065 4066 /* make sure it is in the set */ 4067 if (meta_check_inset(sp, compnp, ep) != 0) 4068 return (-1); 4069 4070 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4071 uint_t rep_slice; 4072 4073 /* 4074 * check to make sure we can partition this drive. 4075 * we cannot continue if any of the following are 4076 * true: 4077 * The drive is a metadevice. 4078 * The drive contains a mounted slice. 4079 * The drive contains a slice being swapped to. 4080 * The drive contains slices which are part of other 4081 * metadevices. 4082 * The drive contains a metadb. 4083 */ 4084 if (metaismeta(compnp)) 4085 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4086 compnp->cname)); 4087 4088 assert(compnp->drivenamep != NULL); 4089 4090 /* 4091 * ensure that we have slice 0 since the disk will be 4092 * repartitioned in the USE_WHOLE_DISK case. this check 4093 * is redundant unless the user incorrectly specifies a 4094 * a fully qualified drive AND slice name (i.e., 4095 * /dev/dsk/cXtXdXsX), which will be incorrectly 4096 * recognized as a drive name by the metaname code. 4097 */ 4098 4099 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4100 return (-1); 4101 if (slice != MD_SLICE0) 4102 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4103 4104 dnp = compnp->drivenamep; 4105 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4106 return (-1); 4107 4108 for (slice = 0; slice < vtocp->nparts; slice++) { 4109 4110 /* only check if the slice really exists */ 4111 if (vtocp->parts[slice].size == 0) 4112 continue; 4113 4114 slicenp = metaslicename(dnp, slice, ep); 4115 if (slicenp == NULL) 4116 return (-1); 4117 4118 /* check to ensure that it is not already in use */ 4119 if (meta_check_inuse(sp, 4120 slicenp, MDCHK_INUSE, ep) != 0) { 4121 return (-1); 4122 } 4123 4124 /* 4125 * Up to this point, tests are applied to all 4126 * slices uniformly. 4127 */ 4128 4129 if (slice == rep_slice) { 4130 /* 4131 * Tests inside the body of this 4132 * conditional are applied only to 4133 * slice seven. 4134 */ 4135 if (meta_check_inmeta(sp, slicenp, 4136 options | MDCHK_ALLOW_MDDB | 4137 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4138 return (-1); 4139 4140 /* 4141 * For slice seven, a metadb is NOT an 4142 * automatic failure. It merely means 4143 * that we're not allowed to muck 4144 * about with the partitioning of that 4145 * slice. We indicate this by masking 4146 * in the MD_REPART_LEAVE_REP flag. 4147 */ 4148 if (metahasmddb(sp, slicenp, ep)) { 4149 assert(repart_options != 4150 NULL); 4151 *repart_options |= 4152 MD_REPART_LEAVE_REP; 4153 } 4154 4155 /* 4156 * Skip the remaining tests for slice 4157 * seven 4158 */ 4159 continue; 4160 } 4161 4162 /* 4163 * Tests below this point will be applied to 4164 * all slices EXCEPT for the replica slice. 4165 */ 4166 4167 4168 /* check if component is in a metadevice */ 4169 if (meta_check_inmeta(sp, slicenp, options, 0, 4170 -1, ep) != 0) 4171 return (-1); 4172 4173 /* check to see if component has a metadb */ 4174 if (metahasmddb(sp, slicenp, ep)) 4175 return (mddeverror(ep, MDE_HAS_MDDB, 4176 slicenp->dev, slicenp->cname)); 4177 } 4178 /* 4179 * This should be all of the testing necessary when 4180 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4181 * meta_check_sp() is oriented towards component 4182 * arguments instead of disks. 4183 */ 4184 goto meta_check_sp_ok; 4185 4186 } 4187 4188 /* check to ensure that it is not already in use */ 4189 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4190 return (-1); 4191 } 4192 4193 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4194 4195 /* 4196 * The component can have one or more soft partitions on it 4197 * already, but can't be part of any other type of metadevice, 4198 * so if it is used for a metadevice, but the metadevice 4199 * isn't a soft partition, return failure. 4200 */ 4201 4202 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4203 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4204 return (-1); 4205 } 4206 } else { /* handle metadevices */ 4207 /* get underlying unit & check capabilities */ 4208 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4209 return (-1); 4210 4211 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4212 (! (mdp->capabilities & MD_CAN_SP))) 4213 return (mdmderror(ep, MDE_INVAL_UNIT, 4214 meta_getminor(compnp->dev), compnp->cname)); 4215 } 4216 4217 meta_check_sp_ok: 4218 mdclrerror(ep); 4219 return (0); 4220 } 4221 4222 /* 4223 * FUNCTION: meta_create_sp() 4224 * INPUT: sp - the set name to create in 4225 * msp - the unit structure to create 4226 * oblist - an optional list of requested extents (-o/-b options) 4227 * options - creation options 4228 * alignment - data alignment 4229 * OUTPUT: ep - return error pointer 4230 * RETURNS: int - 0 success, -1 error 4231 * PURPOSE: does most of the work for creating a soft partition. If 4232 * metainit -p -e was used, first partition the drive. Then 4233 * create an extent list based on the existing soft partitions 4234 * and assume all space not used by them is free. Storage for 4235 * the new soft partition is allocated from the free extents 4236 * based on the length specified on the command line or the 4237 * oblist passed in. The unit structure is then committed and 4238 * the watermarks are updated. Finally, the status is changed to 4239 * Okay and the process is complete. 4240 */ 4241 static int 4242 meta_create_sp( 4243 mdsetname_t *sp, 4244 md_sp_t *msp, 4245 sp_ext_node_t *oblist, 4246 mdcmdopts_t options, 4247 sp_ext_length_t alignment, 4248 md_error_t *ep 4249 ) 4250 { 4251 mdname_t *np = msp->common.namep; 4252 mdname_t *compnp = msp->compnamep; 4253 mp_unit_t *mp = NULL; 4254 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4255 md_set_params_t set_params; 4256 int rval = -1; 4257 diskaddr_t comp_size; 4258 diskaddr_t sp_start; 4259 sp_ext_node_t *extlist = NULL; 4260 int numexts = 0; /* number of extents */ 4261 int count = 0; 4262 int committed = 0; 4263 int repart_options = MD_REPART_FORCE; 4264 int create_flag = MD_CRO_32BIT; 4265 int mn_set_master = 0; 4266 4267 md_set_desc *sd; 4268 md_set_mmown_params_t *ownpar = NULL; 4269 int comp_is_mirror = 0; 4270 4271 /* validate soft partition */ 4272 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4273 return (-1); 4274 4275 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4276 if ((options & MDCMD_DOIT) != 0) { 4277 if (meta_repartition_drive(sp, 4278 compnp->drivenamep, 4279 repart_options, 4280 NULL, /* Don't return the VTOC */ 4281 ep) != 0) 4282 4283 return (-1); 4284 } else { 4285 /* 4286 * If -n and -e are both specified, it doesn't make 4287 * sense to continue without actually partitioning 4288 * the drive. 4289 */ 4290 return (0); 4291 } 4292 } 4293 4294 /* populate the start_blk field of the component name */ 4295 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4296 MD_DISKADDR_ERROR) { 4297 rval = -1; 4298 goto out; 4299 } 4300 4301 if (options & MDCMD_DOIT) { 4302 /* store name in namespace */ 4303 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4304 rval = -1; 4305 goto out; 4306 } 4307 } 4308 4309 /* 4310 * Get a list of the soft partitions that currently reside on 4311 * the component. We should ALWAYS force reload the cache, 4312 * because if this is a single creation, there will not BE a 4313 * cached list, and if we're using the md.tab, we must rebuild 4314 * the list because it won't contain the previous (if any) 4315 * soft partition. 4316 */ 4317 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4318 if (count < 0) { 4319 /* error occured */ 4320 rval = -1; 4321 goto out; 4322 } 4323 4324 /* 4325 * get the size of the underlying device. if the size is smaller 4326 * than or equal to the watermark size, we know there isn't 4327 * enough space. 4328 */ 4329 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4330 rval = -1; 4331 goto out; 4332 } else if (comp_size <= MD_SP_WMSIZE) { 4333 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4334 rval = -1; 4335 goto out; 4336 } 4337 /* 4338 * seed extlist with reserved space at the beginning of the volume and 4339 * enough space for the end watermark. The end watermark always gets 4340 * updated, but if the underlying device changes size it may not be 4341 * pointed to until the extent before it is updated. Since the 4342 * end of the reserved space is where the first watermark starts, 4343 * the reserved extent should never be marked for updating. 4344 */ 4345 4346 meta_sp_list_insert(NULL, NULL, &extlist, 4347 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4348 meta_sp_list_insert(NULL, NULL, &extlist, 4349 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4350 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4351 4352 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4353 rval = -1; 4354 goto out; 4355 } 4356 4357 metafreenamelist(spnlp); 4358 4359 if (getenv(META_SP_DEBUG)) { 4360 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4361 meta_sp_list_dump(extlist); 4362 } 4363 4364 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4365 4366 /* get extent list from -o/-b options or from free space */ 4367 if (options & MDCMD_DIRECT) { 4368 if (getenv(META_SP_DEBUG)) { 4369 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4370 meta_sp_list_dump(oblist); 4371 } 4372 4373 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4374 if (numexts == -1) { 4375 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4376 rval = -1; 4377 goto out; 4378 } 4379 } else { 4380 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4381 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4382 meta_sp_get_default_alignment(sp, compnp, ep)); 4383 if (numexts == -1) { 4384 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4385 rval = -1; 4386 goto out; 4387 } 4388 } 4389 4390 assert(extlist != NULL); 4391 4392 /* create soft partition */ 4393 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4394 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4395 4396 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4397 4398 /* if we're not doing anything (metainit -n), return success */ 4399 if (! (options & MDCMD_DOIT)) { 4400 rval = 0; /* success */ 4401 goto out; 4402 } 4403 4404 (void) memset(&set_params, 0, sizeof (set_params)); 4405 4406 if (create_flag == MD_CRO_64BIT) { 4407 mp->c.un_revision |= MD_64BIT_META_DEV; 4408 set_params.options = MD_CRO_64BIT; 4409 } else { 4410 mp->c.un_revision &= ~MD_64BIT_META_DEV; 4411 set_params.options = MD_CRO_32BIT; 4412 } 4413 4414 if (getenv(META_SP_DEBUG)) { 4415 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4416 meta_sp_printunit(mp); 4417 } 4418 4419 /* 4420 * Check to see if we're trying to create a partition on a mirror. If so 4421 * we may have to enforce an ownership change before writing the 4422 * watermark out. 4423 */ 4424 if (metaismeta(compnp)) { 4425 char *miscname; 4426 4427 miscname = metagetmiscname(compnp, ep); 4428 if (miscname != NULL) 4429 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4430 else 4431 comp_is_mirror = 0; 4432 } else { 4433 comp_is_mirror = 0; 4434 } 4435 4436 /* 4437 * For a multi-node environment we have to ensure that the master 4438 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4439 * If the master does not own the device we will deadlock as the 4440 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4441 * ownership change that will block as the MD_IOCSET is still in 4442 * progress. To close this window we force an owner change to occur 4443 * before issuing the MD_IOCSET. We cannot simply open the device and 4444 * write to it as this will only work for the first soft-partition 4445 * creation. 4446 */ 4447 4448 if (comp_is_mirror && !metaislocalset(sp)) { 4449 4450 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4451 rval = -1; 4452 goto out; 4453 } 4454 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4455 mn_set_master = 1; 4456 } 4457 } 4458 4459 set_params.mnum = MD_SID(mp); 4460 set_params.size = mp->c.un_size; 4461 set_params.mdp = (uintptr_t)mp; 4462 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4463 4464 /* first phase of commit. */ 4465 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4466 np->cname) != 0) { 4467 (void) mdstealerror(ep, &set_params.mde); 4468 rval = -1; 4469 goto out; 4470 } 4471 4472 /* we've successfully committed the record */ 4473 committed = 1; 4474 4475 /* write watermarks */ 4476 /* 4477 * Special-case for Multi-node sets. As we now have a distributed DRL 4478 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case 4479 * unless we use a 'special' MN-capable ioctl to stage the watermark 4480 * update. This only affects the master-node in an MN set. 4481 */ 4482 if (mn_set_master) { 4483 if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) { 4484 rval = -1; 4485 goto out; 4486 } 4487 } else { 4488 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4489 rval = -1; 4490 goto out; 4491 } 4492 } 4493 4494 /* second phase of commit, set status to MD_SP_OK */ 4495 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4496 rval = -1; 4497 goto out; 4498 } 4499 rval = 0; 4500 out: 4501 Free(mp); 4502 if (ownpar) 4503 Free(ownpar); 4504 4505 if (extlist != NULL) 4506 meta_sp_list_free(&extlist); 4507 4508 if (rval != 0 && keynlp != NULL && committed != 1) 4509 (void) del_key_names(sp, keynlp, NULL); 4510 4511 metafreenamelist(keynlp); 4512 4513 return (rval); 4514 } 4515 4516 /* 4517 * ************************************************************************** 4518 * Reset (metaclear) Functions * 4519 * ************************************************************************** 4520 */ 4521 4522 /* 4523 * FUNCTION: meta_sp_reset_common() 4524 * INPUT: sp - the set name of the device to reset 4525 * np - the name of the device to reset 4526 * msp - the unit structure to reset 4527 * options - metaclear options 4528 * OUTPUT: ep - return error pointer 4529 * RETURNS: int - 0 success, -1 error 4530 * PURPOSE: "resets", or more accurately deletes, the soft partition 4531 * specified. First the state is set to "deleting" and then the 4532 * watermarks are all cleared out. Once the watermarks have been 4533 * updated, the unit structure is deleted from the metadb. 4534 */ 4535 static int 4536 meta_sp_reset_common( 4537 mdsetname_t *sp, 4538 mdname_t *np, 4539 md_sp_t *msp, 4540 md_sp_reset_t reset_params, 4541 mdcmdopts_t options, 4542 md_error_t *ep 4543 ) 4544 { 4545 char *miscname; 4546 int rval = -1; 4547 int is_open = 0; 4548 4549 /* make sure that nobody owns us */ 4550 if (MD_HAS_PARENT(msp->common.parent)) 4551 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4552 np->cname)); 4553 4554 /* make sure that the soft partition isn't open */ 4555 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4556 return (-1); 4557 else if (is_open) 4558 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4559 np->cname)); 4560 4561 /* get miscname */ 4562 if ((miscname = metagetmiscname(np, ep)) == NULL) 4563 return (-1); 4564 4565 /* fill in reset params */ 4566 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4567 reset_params.mnum = meta_getminor(np->dev); 4568 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4569 4570 /* 4571 * clear soft partition - phase one. 4572 * place the soft partition into the "delete pending" state. 4573 */ 4574 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4575 return (-1); 4576 4577 /* 4578 * Now clear the watermarks. If the force flag is specified, 4579 * ignore any errors writing the watermarks and delete the unit 4580 * structure anyway. An error may leave the on-disk format in a 4581 * corrupt state. If force is not specified and we fail here, 4582 * the soft partition will remain in the "delete pending" state. 4583 */ 4584 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4585 ((options & MDCMD_FORCE) == 0)) 4586 goto out; 4587 4588 /* 4589 * clear soft partition - phase two. 4590 * the driver removes the soft partition from the metadb and 4591 * zeros out incore version. 4592 */ 4593 if (metaioctl(MD_IOCRESET, &reset_params, 4594 &reset_params.mde, np->cname) != 0) { 4595 (void) mdstealerror(ep, &reset_params.mde); 4596 goto out; 4597 } 4598 4599 /* 4600 * Wait for the /dev to be cleaned up. Ignore the return 4601 * value since there's not much we can do. 4602 */ 4603 (void) meta_update_devtree(meta_getminor(np->dev)); 4604 4605 rval = 0; /* success */ 4606 4607 if (options & MDCMD_PRINT) { 4608 (void) printf(dgettext(TEXT_DOMAIN, 4609 "%s: Soft Partition is cleared\n"), 4610 np->cname); 4611 (void) fflush(stdout); 4612 } 4613 4614 /* 4615 * if told to recurse and on a metadevice, then attempt to 4616 * clear the subdevices. Indicate failure if the clear fails. 4617 */ 4618 if ((options & MDCMD_RECURSE) && 4619 (metaismeta(msp->compnamep)) && 4620 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4621 rval = -1; 4622 4623 out: 4624 meta_invalidate_name(np); 4625 return (rval); 4626 } 4627 4628 /* 4629 * FUNCTION: meta_sp_reset() 4630 * INPUT: sp - the set name of the device to reset 4631 * np - the name of the device to reset 4632 * options - metaclear options 4633 * OUTPUT: ep - return error pointer 4634 * RETURNS: int - 0 success, -1 error 4635 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4636 * soft partition. If np is NULL, then soft partitions are 4637 * all deleted at the current level and then recursively deleted. 4638 * Otherwise, if a name is specified either directly or as a 4639 * result of a recursive operation, it deletes only that name. 4640 * Since something sitting under a soft partition may be parented 4641 * to it, we have to reparent that other device to another soft 4642 * partition on the same component if we're deleting the one it's 4643 * parented to. 4644 */ 4645 int 4646 meta_sp_reset( 4647 mdsetname_t *sp, 4648 mdname_t *np, 4649 mdcmdopts_t options, 4650 md_error_t *ep 4651 ) 4652 { 4653 md_sp_t *msp; 4654 int rval = -1; 4655 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4656 md_sp_reset_t reset_params; 4657 int num_sp; 4658 4659 assert(sp != NULL); 4660 4661 /* reset/delete all soft paritions */ 4662 if (np == NULL) { 4663 /* 4664 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4665 * is incorrect for soft partitions. We want to clear 4666 * all soft partitions at a particular level in the 4667 * metadevice stack before moving to the next level. 4668 * Thus, we clear MDCMD_RECURSE from the options. 4669 */ 4670 options &= ~MDCMD_RECURSE; 4671 4672 /* for each soft partition */ 4673 rval = 0; 4674 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4675 rval = -1; 4676 4677 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4678 np = nlp->namep; 4679 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4680 rval = -1; 4681 break; 4682 } 4683 /* 4684 * meta_reset_all calls us twice to get soft 4685 * partitions at the top and bottom of the stack. 4686 * thus, if we have a parent, we'll get deleted 4687 * on the next call. 4688 */ 4689 if (MD_HAS_PARENT(msp->common.parent)) 4690 continue; 4691 /* 4692 * If this is a multi-node set, we send a series 4693 * of individual metaclear commands. 4694 */ 4695 if (meta_is_mn_set(sp, ep)) { 4696 if (meta_mn_send_metaclear_command(sp, 4697 np->cname, options, 0, ep) != 0) { 4698 rval = -1; 4699 break; 4700 } 4701 } else { 4702 if (meta_sp_reset(sp, np, options, ep) != 0) { 4703 rval = -1; 4704 break; 4705 } 4706 } 4707 } 4708 /* cleanup return status */ 4709 metafreenamelist(spnlp); 4710 return (rval); 4711 } 4712 4713 /* check the name */ 4714 if (metachkmeta(np, ep) != 0) 4715 return (-1); 4716 4717 /* get the unit structure */ 4718 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4719 return (-1); 4720 4721 /* clear out reset parameters */ 4722 (void) memset(&reset_params, 0, sizeof (reset_params)); 4723 4724 /* if our child is a metadevice, we need to deparent/reparent it */ 4725 if (metaismeta(msp->compnamep)) { 4726 /* get sp's on this component */ 4727 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4728 &spnlp, 1, ep)) <= 0) 4729 /* no sp's on this device. error! */ 4730 return (-1); 4731 else if (num_sp == 1) 4732 /* last sp on this device, so we deparent */ 4733 reset_params.new_parent = MD_NO_PARENT; 4734 else { 4735 /* have to reparent this metadevice */ 4736 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4737 if (meta_getminor(nlp->namep->dev) == 4738 meta_getminor(np->dev)) 4739 continue; 4740 /* 4741 * this isn't the softpart we are deleting, 4742 * so use this device as the new parent. 4743 */ 4744 reset_params.new_parent = 4745 meta_getminor(nlp->namep->dev); 4746 break; 4747 } 4748 } 4749 metafreenamelist(spnlp); 4750 } 4751 4752 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4753 return (-1); 4754 4755 return (0); 4756 } 4757 4758 /* 4759 * FUNCTION: meta_sp_reset_component() 4760 * INPUT: sp - the set name of the device to reset 4761 * name - the string name of the device to reset 4762 * options - metaclear options 4763 * OUTPUT: ep - return error pointer 4764 * RETURNS: int - 0 success, -1 error 4765 * PURPOSE: provides the ability to delete all soft partitions on a 4766 * specified device (metaclear -p). It first gets all of the 4767 * soft partitions on the component and then deletes each one 4768 * individually. 4769 */ 4770 int 4771 meta_sp_reset_component( 4772 mdsetname_t *sp, 4773 char *name, 4774 mdcmdopts_t options, 4775 md_error_t *ep 4776 ) 4777 { 4778 mdname_t *compnp, *np; 4779 mdnamelist_t *spnlp = NULL; 4780 mdnamelist_t *nlp = NULL; 4781 md_sp_t *msp; 4782 int count; 4783 md_sp_reset_t reset_params; 4784 4785 if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL) 4786 return (-1); 4787 4788 /* If we're starting out with no soft partitions, it's an error */ 4789 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4790 if (count == 0) 4791 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4792 else if (count < 0) 4793 return (-1); 4794 4795 /* 4796 * clear all soft partitions on this component. 4797 * NOTE: we reparent underlying metadevices as we go so that 4798 * things stay sane. Also, if we encounter an error, we stop 4799 * and go no further in case recovery might be needed. 4800 */ 4801 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4802 /* clear out reset parameters */ 4803 (void) memset(&reset_params, 0, sizeof (reset_params)); 4804 4805 /* check the name */ 4806 np = nlp->namep; 4807 4808 if (metachkmeta(np, ep) != 0) { 4809 metafreenamelist(spnlp); 4810 return (-1); 4811 } 4812 4813 /* get the unit structure */ 4814 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4815 metafreenamelist(spnlp); 4816 return (-1); 4817 } 4818 4819 /* have to deparent/reparent metadevices */ 4820 if (metaismeta(compnp)) { 4821 if (nlp->next == NULL) 4822 reset_params.new_parent = MD_NO_PARENT; 4823 else 4824 reset_params.new_parent = 4825 meta_getminor(spnlp->next->namep->dev); 4826 } 4827 4828 /* clear soft partition */ 4829 if (meta_sp_reset_common(sp, np, msp, reset_params, 4830 options, ep) < 0) { 4831 metafreenamelist(spnlp); 4832 return (-1); 4833 } 4834 } 4835 metafreenamelist(spnlp); 4836 return (0); 4837 } 4838 4839 /* 4840 * ************************************************************************** 4841 * Grow (metattach) Functions * 4842 * ************************************************************************** 4843 */ 4844 4845 /* 4846 * FUNCTION: meta_sp_attach() 4847 * INPUT: sp - the set name of the device to attach to 4848 * np - the name of the device to attach to 4849 * addsize - the unparsed string holding the amount of space to add 4850 * options - metattach options 4851 * alignment - data alignment 4852 * OUTPUT: ep - return error pointer 4853 * RETURNS: int - 0 success, -1 error 4854 * PURPOSE: grows a soft partition by reading in the existing unit 4855 * structure and setting its state to Growing, allocating more 4856 * space (similar to meta_create_sp()), updating the watermarks, 4857 * and then writing out the new unit structure in the Okay state. 4858 */ 4859 int 4860 meta_sp_attach( 4861 mdsetname_t *sp, 4862 mdname_t *np, 4863 char *addsize, 4864 mdcmdopts_t options, 4865 sp_ext_length_t alignment, 4866 md_error_t *ep 4867 ) 4868 { 4869 md_grow_params_t grow_params; 4870 sp_ext_length_t grow_len; /* amount to grow */ 4871 mp_unit_t *mp, *new_un; 4872 mdname_t *compnp = NULL; 4873 4874 sp_ext_node_t *extlist = NULL; 4875 int numexts; 4876 mdnamelist_t *spnlp = NULL; 4877 int count; 4878 md_sp_t *msp; 4879 daddr_t start_block; 4880 4881 /* should have the same set */ 4882 assert(sp != NULL); 4883 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4884 4885 /* check name */ 4886 if (metachkmeta(np, ep) != 0) 4887 return (-1); 4888 4889 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4890 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4891 } 4892 4893 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4894 return (-1); 4895 4896 /* make sure we don't have a parent */ 4897 if (MD_HAS_PARENT(mp->c.un_parent)) { 4898 Free(mp); 4899 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4900 } 4901 4902 if (getenv(META_SP_DEBUG)) { 4903 meta_sp_debug("meta_sp_attach: Unit structure before new " 4904 "space:\n"); 4905 meta_sp_printunit(mp); 4906 } 4907 4908 /* 4909 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4910 * If this was not the case we would suffer the following 4911 * assertion failure: 4912 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4913 * file meta_check.x, line 315 4914 * I guess this is because we have not "seen" this drive before 4915 * and hence hit the failure - this is of course the attach routine 4916 */ 4917 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4918 Free(mp); 4919 return (-1); 4920 } 4921 4922 /* metakeyname does not fill in the key. */ 4923 compnp->key = mp->un_key; 4924 4925 /* work out the space on the component that we are dealing with */ 4926 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4927 4928 /* 4929 * see if the component has been soft partitioned yet, or if an 4930 * error occurred. 4931 */ 4932 if (count == 0) { 4933 Free(mp); 4934 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4935 } else if (count < 0) { 4936 Free(mp); 4937 return (-1); 4938 } 4939 4940 /* 4941 * seed extlist with reserved space at the beginning of the volume and 4942 * enough space for the end watermark. The end watermark always gets 4943 * updated, but if the underlying device changes size it may not be 4944 * pointed to until the extent before it is updated. Since the 4945 * end of the reserved space is where the first watermark starts, 4946 * the reserved extent should never be marked for updating. 4947 */ 4948 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4949 MD_DISKADDR_ERROR) { 4950 Free(mp); 4951 return (-1); 4952 } 4953 4954 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4955 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4956 meta_sp_list_insert(NULL, NULL, &extlist, 4957 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4958 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4959 4960 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4961 Free(mp); 4962 return (-1); 4963 } 4964 4965 metafreenamelist(spnlp); 4966 4967 if (getenv(META_SP_DEBUG)) { 4968 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4969 meta_sp_list_dump(extlist); 4970 } 4971 4972 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4973 4974 assert(mp->un_numexts >= 1); 4975 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4976 mp->un_ext[mp->un_numexts - 1].un_poff, 4977 (alignment > 0) ? alignment : 4978 meta_sp_get_default_alignment(sp, compnp, ep)); 4979 4980 if (numexts == -1) { 4981 Free(mp); 4982 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4983 } 4984 4985 /* allocate new unit structure and copy in old unit */ 4986 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4987 grow_len, numexts, ep)) == NULL) { 4988 Free(mp); 4989 return (-1); 4990 } 4991 Free(mp); 4992 4993 /* If running in dryrun mode (-n option), we're done here */ 4994 if ((options & MDCMD_DOIT) == 0) { 4995 if (options & MDCMD_PRINT) { 4996 (void) printf(dgettext(TEXT_DOMAIN, 4997 "%s: Soft Partition would grow\n"), 4998 np->cname); 4999 (void) fflush(stdout); 5000 } 5001 return (0); 5002 } 5003 5004 if (getenv(META_SP_DEBUG)) { 5005 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 5006 meta_sp_printunit(new_un); 5007 } 5008 5009 assert(new_un != NULL); 5010 5011 (void) memset(&grow_params, 0, sizeof (grow_params)); 5012 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 5013 grow_params.options = MD_CRO_64BIT; 5014 new_un->c.un_revision |= MD_64BIT_META_DEV; 5015 } else { 5016 grow_params.options = MD_CRO_32BIT; 5017 new_un->c.un_revision &= ~MD_64BIT_META_DEV; 5018 } 5019 grow_params.mnum = MD_SID(new_un); 5020 grow_params.size = new_un->c.un_size; 5021 grow_params.mdp = (uintptr_t)new_un; 5022 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 5023 5024 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 5025 np->cname) != 0) { 5026 (void) mdstealerror(ep, &grow_params.mde); 5027 return (-1); 5028 } 5029 5030 /* update all watermarks */ 5031 5032 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 5033 return (-1); 5034 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 5035 return (-1); 5036 5037 5038 /* second phase of commit, set status to MD_SP_OK */ 5039 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 5040 return (-1); 5041 5042 meta_invalidate_name(np); 5043 5044 if (options & MDCMD_PRINT) { 5045 (void) printf(dgettext(TEXT_DOMAIN, 5046 "%s: Soft Partition has been grown\n"), 5047 np->cname); 5048 (void) fflush(stdout); 5049 } 5050 5051 return (0); 5052 } 5053 5054 /* 5055 * ************************************************************************** 5056 * Recovery (metarecover) Functions * 5057 * ************************************************************************** 5058 */ 5059 5060 /* 5061 * FUNCTION: meta_recover_sp() 5062 * INPUT: sp - the name of the set we are recovering on 5063 * compnp - name pointer for device we are recovering on 5064 * argc - argument count 5065 * argv - left over arguments not parsed by metarecover command 5066 * options - metarecover options 5067 * OUTPUT: ep - return error pointer 5068 * RETURNS: int - 0 - success, -1 - error 5069 * PURPOSE: parse soft partitioning-specific metarecover options and 5070 * dispatch to the appropriate function to handle recovery. 5071 */ 5072 int 5073 meta_recover_sp( 5074 mdsetname_t *sp, 5075 mdname_t *compnp, 5076 int argc, 5077 char *argv[], 5078 mdcmdopts_t options, 5079 md_error_t *ep 5080 ) 5081 { 5082 md_set_desc *sd; 5083 5084 if (argc > 1) { 5085 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5086 argc, argv); 5087 return (-1); 5088 } 5089 5090 /* 5091 * For a MN set, this operation must be performed on the master 5092 * as it is responsible for maintaining the watermarks 5093 */ 5094 if (!metaislocalset(sp)) { 5095 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5096 return (-1); 5097 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5098 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5099 sd->sd_mn_master_nodenm, NULL, NULL); 5100 return (-1); 5101 } 5102 } 5103 if (argc == 0) { 5104 /* 5105 * if no additional arguments are passed, metarecover should 5106 * validate both on-disk and metadb structures as well as 5107 * checking that both are consistent with each other 5108 */ 5109 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5110 return (-1); 5111 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5112 return (-1); 5113 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5114 return (-1); 5115 } else if (strcmp(argv[0], "-d") == 0) { 5116 /* 5117 * Ensure that there is no existing valid record for this 5118 * soft-partition. If there is we have nothing to do. 5119 */ 5120 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5121 return (-1); 5122 /* validate and recover from on-disk structures */ 5123 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5124 return (-1); 5125 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5126 return (-1); 5127 } else if (strcmp(argv[0], "-m") == 0) { 5128 /* validate and recover from metadb structures */ 5129 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5130 return (-1); 5131 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5132 return (-1); 5133 } else { 5134 /* syntax error */ 5135 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5136 argc, argv); 5137 return (-1); 5138 } 5139 5140 return (0); 5141 } 5142 5143 /* 5144 * FUNCTION: meta_sp_display_exthdr() 5145 * INPUT: none 5146 * OUTPUT: none 5147 * RETURNS: void 5148 * PURPOSE: print header line for sp_ext_node_t information. to be used 5149 * in conjunction with meta_sp_display_ext(). 5150 */ 5151 static void 5152 meta_sp_display_exthdr(void) 5153 { 5154 (void) printf("%20s %5s %7s %20s %20s\n", 5155 dgettext(TEXT_DOMAIN, "Name"), 5156 dgettext(TEXT_DOMAIN, "Seq#"), 5157 dgettext(TEXT_DOMAIN, "Type"), 5158 dgettext(TEXT_DOMAIN, "Offset"), 5159 dgettext(TEXT_DOMAIN, "Length")); 5160 } 5161 5162 5163 /* 5164 * FUNCTION: meta_sp_display_ext() 5165 * INPUT: ext - extent to display 5166 * OUTPUT: none 5167 * RETURNS: void 5168 * PURPOSE: print selected fields from sp_ext_node_t. 5169 */ 5170 static void 5171 meta_sp_display_ext(sp_ext_node_t *ext) 5172 { 5173 /* print extent information */ 5174 if (ext->ext_namep != NULL) 5175 (void) printf("%20s ", ext->ext_namep->cname); 5176 else 5177 (void) printf("%20s ", "NONE"); 5178 5179 (void) printf("%5u ", ext->ext_seq); 5180 5181 switch (ext->ext_type) { 5182 case EXTTYP_ALLOC: 5183 (void) printf("%7s ", "ALLOC"); 5184 break; 5185 case EXTTYP_FREE: 5186 (void) printf("%7s ", "FREE"); 5187 break; 5188 case EXTTYP_RESERVED: 5189 (void) printf("%7s ", "RESV"); 5190 break; 5191 case EXTTYP_END: 5192 (void) printf("%7s ", "END"); 5193 break; 5194 default: 5195 (void) printf("%7s ", "INVLD"); 5196 break; 5197 } 5198 5199 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5200 } 5201 5202 5203 /* 5204 * FUNCTION: meta_sp_checkseq() 5205 * INPUT: extlist - list of extents to be checked 5206 * OUTPUT: none 5207 * RETURNS: int - 0 - success, -1 - error 5208 * PURPOSE: check soft partition sequence numbers. this function assumes 5209 * that a list of extents representing 1 or more soft partitions 5210 * is passed in sorted in sequence number order. within a 5211 * single soft partition, there may not be any missing or 5212 * duplicate sequence numbers. 5213 */ 5214 static int 5215 meta_sp_checkseq(sp_ext_node_t *extlist) 5216 { 5217 sp_ext_node_t *ext; 5218 5219 assert(extlist != NULL); 5220 5221 for (ext = extlist; 5222 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5223 ext = ext->ext_next) { 5224 if (ext->ext_next->ext_namep != NULL && 5225 strcmp(ext->ext_next->ext_namep->cname, 5226 ext->ext_namep->cname) != 0) 5227 continue; 5228 5229 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5230 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5231 "%s: sequence numbers are " 5232 "incorrect: %d should be %d\n"), 5233 ext->ext_next->ext_namep->cname, 5234 ext->ext_next->ext_seq, ext->ext_seq + 1); 5235 return (-1); 5236 } 5237 } 5238 return (0); 5239 } 5240 5241 5242 /* 5243 * FUNCTION: meta_sp_resolve_name_conflict() 5244 * INPUT: sp - name of set we're are recovering in. 5245 * old_np - name pointer of soft partition we found on disk. 5246 * OUTPUT: new_np - name pointer for new soft partition name. 5247 * ep - error pointer returned. 5248 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5249 * PURPOSE: Check to see if the name of one of the soft partitions we found 5250 * on disk already exists in the metadb. If so, prompt for a new 5251 * name. In addition, we keep a static array of names that 5252 * will be recovered from this device since these names don't 5253 * exist in the configuration at this point but cannot be 5254 * recovered more than once. 5255 */ 5256 static int 5257 meta_sp_resolve_name_conflict( 5258 mdsetname_t *sp, 5259 mdname_t *old_np, 5260 mdname_t **new_np, 5261 md_error_t *ep 5262 ) 5263 { 5264 char yesno[255]; 5265 char *yes; 5266 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5267 int nunits; 5268 static int *used_names = NULL; 5269 5270 assert(old_np != NULL); 5271 5272 if (used_names == NULL) { 5273 if ((nunits = meta_get_nunits(ep)) < 0) 5274 return (-1); 5275 used_names = Zalloc(nunits * sizeof (int)); 5276 } 5277 5278 /* see if it exists already */ 5279 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5280 metagetmiscname(old_np, ep) == NULL) { 5281 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5282 return (-1); 5283 else { 5284 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5285 mdclrerror(ep); 5286 return (0); 5287 } 5288 } 5289 5290 /* name exists, ask the user for a new one */ 5291 (void) printf(dgettext(TEXT_DOMAIN, 5292 "WARNING: A soft partition named %s was found in the extent\n" 5293 "headers, but this name already exists in the metadb " 5294 "configuration.\n" 5295 "In order to continue recovery you must supply\n" 5296 "a new name for this soft partition.\n"), old_np->cname); 5297 (void) printf(dgettext(TEXT_DOMAIN, 5298 "Would you like to continue and supply a new name? (yes/no) ")); 5299 5300 (void) fflush(stdout); 5301 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5302 (strlen(yesno) == 1)) 5303 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5304 dgettext(TEXT_DOMAIN, "no")); 5305 yes = dgettext(TEXT_DOMAIN, "yes"); 5306 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5307 return (-1); 5308 } 5309 5310 (void) fflush(stdin); 5311 5312 /* get the new name */ 5313 for (;;) { 5314 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5315 "for this soft partition (dXXXX) ")); 5316 (void) fflush(stdout); 5317 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5318 (void) strcpy(newname, ""); 5319 5320 /* remove newline character */ 5321 if (newname[strlen(newname) - 1] == '\n') 5322 newname[strlen(newname) - 1] = '\0'; 5323 5324 if (!(is_metaname(newname)) || 5325 (meta_init_make_device(&sp, newname, ep) <= 0)) { 5326 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5327 "Invalid metadevice name\n")); 5328 (void) fflush(stderr); 5329 continue; 5330 } 5331 5332 if ((*new_np = metaname(&sp, newname, 5333 META_DEVICE, ep)) == NULL) { 5334 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5335 "Invalid metadevice name\n")); 5336 (void) fflush(stderr); 5337 continue; 5338 } 5339 5340 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5341 /* make sure the name isn't already being used */ 5342 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5343 metagetmiscname(*new_np, ep) != NULL) { 5344 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5345 "That name already exists\n")); 5346 continue; 5347 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5348 return (-1); 5349 5350 break; 5351 } 5352 5353 /* got a new name, place in used array and return */ 5354 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5355 mdclrerror(ep); 5356 return (1); 5357 } 5358 5359 /* 5360 * FUNCTION: meta_sp_validate_wm() 5361 * INPUT: sp - set name we are recovering in 5362 * compnp - name pointer for device we are recovering from 5363 * options - metarecover options 5364 * OUTPUT: ep - error pointer returned 5365 * RETURNS: int - 0 - success, -1 - error 5366 * PURPOSE: validate and display watermark configuration. walk the 5367 * on-disk watermark structures and validate the information 5368 * found within. since a watermark configuration is 5369 * "self-defining", the act of traversing the watermarks 5370 * is part of the validation process. 5371 */ 5372 static int 5373 meta_sp_validate_wm( 5374 mdsetname_t *sp, 5375 mdname_t *compnp, 5376 mdcmdopts_t options, 5377 md_error_t *ep 5378 ) 5379 { 5380 sp_ext_node_t *extlist = NULL; 5381 sp_ext_node_t *ext; 5382 int num_sps = 0; 5383 int rval; 5384 5385 if ((options & MDCMD_VERBOSE) != 0) 5386 (void) printf(dgettext(TEXT_DOMAIN, 5387 "Verifying on-disk structures on %s.\n"), 5388 compnp->cname); 5389 5390 /* 5391 * for each watermark, build an ext_node, place on list. 5392 */ 5393 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5394 meta_sp_cmp_by_nameseq, ep); 5395 5396 if ((options & MDCMD_VERBOSE) != 0) { 5397 /* print out what we found */ 5398 if (extlist == NULL) 5399 (void) printf(dgettext(TEXT_DOMAIN, 5400 "No extent headers found on %s.\n"), 5401 compnp->cname); 5402 else { 5403 (void) printf(dgettext(TEXT_DOMAIN, 5404 "The following extent headers were found on %s.\n"), 5405 compnp->cname); 5406 meta_sp_display_exthdr(); 5407 } 5408 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5409 meta_sp_display_ext(ext); 5410 } 5411 5412 if (rval < 0) { 5413 (void) printf(dgettext(TEXT_DOMAIN, 5414 "%s: On-disk structures invalid or " 5415 "no soft partitions found.\n"), 5416 compnp->cname); 5417 return (-1); 5418 } 5419 5420 assert(extlist != NULL); 5421 5422 /* count number of soft partitions */ 5423 for (ext = extlist; 5424 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5425 ext = ext->ext_next) { 5426 if (ext->ext_next != NULL && 5427 ext->ext_next->ext_namep != NULL && 5428 strcmp(ext->ext_next->ext_namep->cname, 5429 ext->ext_namep->cname) == 0) 5430 continue; 5431 num_sps++; 5432 } 5433 5434 if ((options & MDCMD_VERBOSE) != 0) 5435 (void) printf(dgettext(TEXT_DOMAIN, 5436 "Found %d soft partition(s) on %s.\n"), num_sps, 5437 compnp->cname); 5438 5439 if (num_sps == 0) { 5440 (void) printf(dgettext(TEXT_DOMAIN, 5441 "%s: No soft partitions.\n"), compnp->cname); 5442 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5443 } 5444 5445 /* check sequence numbers */ 5446 if ((options & MDCMD_VERBOSE) != 0) 5447 (void) printf(dgettext(TEXT_DOMAIN, 5448 "Checking sequence numbers.\n")); 5449 5450 if (meta_sp_checkseq(extlist) != 0) 5451 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5452 5453 return (0); 5454 } 5455 5456 /* 5457 * FUNCTION: meta_sp_validate_unit() 5458 * INPUT: sp - name of set we are recovering in 5459 * compnp - name of component we are recovering from 5460 * options - metarecover options 5461 * OUTPUT: ep - error pointer returned 5462 * RETURNS: int - 0 - success, -1 - error 5463 * PURPOSE: validate and display metadb configuration. begin by getting 5464 * all soft partitions built on the specified component. get 5465 * the unit structure for each one and validate the fields within. 5466 */ 5467 static int 5468 meta_sp_validate_unit( 5469 mdsetname_t *sp, 5470 mdname_t *compnp, 5471 mdcmdopts_t options, 5472 md_error_t *ep 5473 ) 5474 { 5475 md_sp_t *msp; 5476 mdnamelist_t *spnlp = NULL; 5477 mdnamelist_t *namep = NULL; 5478 int count; 5479 uint_t extn; 5480 sp_ext_length_t size; 5481 5482 if ((options & MDCMD_VERBOSE) != 0) 5483 (void) printf(dgettext(TEXT_DOMAIN, 5484 "%s: Validating soft partition metadb entries.\n"), 5485 compnp->cname); 5486 5487 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5488 return (-1); 5489 5490 /* get all soft partitions on component */ 5491 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5492 5493 if (count == 0) { 5494 (void) printf(dgettext(TEXT_DOMAIN, 5495 "%s: No soft partitions.\n"), compnp->cname); 5496 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5497 } else if (count < 0) { 5498 return (-1); 5499 } 5500 5501 /* Now go through the soft partitions and check each one */ 5502 for (namep = spnlp; namep != NULL; namep = namep->next) { 5503 mdname_t *curnp = namep->namep; 5504 sp_ext_offset_t curvoff; 5505 5506 /* get the unit structure */ 5507 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5508 return (-1); 5509 5510 /* verify generic unit structure parameters */ 5511 if ((options & MDCMD_VERBOSE) != 0) 5512 (void) printf(dgettext(TEXT_DOMAIN, 5513 "\nVerifying device %s.\n"), 5514 curnp->cname); 5515 5516 /* 5517 * MD_SP_LAST is an invalid state and is always the 5518 * highest numbered. 5519 */ 5520 if (msp->status >= MD_SP_LAST) { 5521 (void) printf(dgettext(TEXT_DOMAIN, 5522 "%s: status value %u is out of range.\n"), 5523 curnp->cname, msp->status); 5524 return (mdmderror(ep, MDE_RECOVER_FAILED, 5525 0, curnp->cname)); 5526 } else if ((options & MDCMD_VERBOSE) != 0) { 5527 uint_t tstate = 0; 5528 5529 if (metaismeta(msp->compnamep)) { 5530 if (meta_get_tstate(msp->common.namep->dev, 5531 &tstate, ep) != 0) 5532 return (-1); 5533 } 5534 (void) printf(dgettext(TEXT_DOMAIN, 5535 "%s: Status \"%s\" is valid.\n"), 5536 curnp->cname, meta_sp_status_to_name(msp->status, 5537 tstate & MD_DEV_ERRORED)); 5538 } 5539 5540 /* Now verify each extent */ 5541 if ((options & MDCMD_VERBOSE) != 0) 5542 (void) printf("%14s %21s %21s %21s\n", 5543 dgettext(TEXT_DOMAIN, "Extent Number"), 5544 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5545 dgettext(TEXT_DOMAIN, "Physical Offset"), 5546 dgettext(TEXT_DOMAIN, "Length")); 5547 5548 curvoff = 0ULL; 5549 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5550 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5551 5552 if ((options & MDCMD_VERBOSE) != 0) 5553 (void) printf("%14u %21llu %21llu %21llu\n", 5554 extn, extp->voff, extp->poff, extp->len); 5555 5556 if (extp->voff != curvoff) { 5557 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5558 "%s: virtual offset for extent %u " 5559 "is inconsistent, expected %llu, " 5560 "got %llu.\n"), curnp->cname, extn, 5561 curvoff, extp->voff); 5562 return (mdmderror(ep, MDE_RECOVER_FAILED, 5563 0, compnp->cname)); 5564 } 5565 5566 /* make sure extent does not drop off the end */ 5567 if ((extp->poff + extp->len) == size) { 5568 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5569 "%s: extent %u at offset %llu, " 5570 "length %llu exceeds the size of the " 5571 "device, %llu.\n"), curnp->cname, 5572 extn, extp->poff, extp->len, size); 5573 return (mdmderror(ep, MDE_RECOVER_FAILED, 5574 0, compnp->cname)); 5575 } 5576 5577 curvoff += extp->len; 5578 } 5579 } 5580 if (options & MDCMD_PRINT) { 5581 (void) printf(dgettext(TEXT_DOMAIN, 5582 "%s: Soft Partition metadb configuration is valid\n"), 5583 compnp->cname); 5584 } 5585 return (0); 5586 } 5587 5588 /* 5589 * FUNCTION: meta_sp_validate_wm_and_unit() 5590 * INPUT: sp - name of set we are recovering in 5591 * compnp - name of device we are recovering from 5592 * options - metarecover options 5593 * OUTPUT: ep - error pointer returned 5594 * RETURNS: int - 0 - success, -1 error 5595 * PURPOSE: cross-validate and display watermarks and metadb records. 5596 * get both the unit structures for the soft partitions built 5597 * on the specified component and the watermarks found on that 5598 * component and check to make sure they are consistent with 5599 * each other. 5600 */ 5601 static int 5602 meta_sp_validate_wm_and_unit( 5603 mdsetname_t *sp, 5604 mdname_t *np, 5605 mdcmdopts_t options, 5606 md_error_t *ep 5607 ) 5608 { 5609 sp_ext_node_t *wmlist = NULL; 5610 sp_ext_node_t *unitlist = NULL; 5611 sp_ext_node_t *unitext; 5612 sp_ext_node_t *wmext; 5613 sp_ext_offset_t tmpunitoff; 5614 mdnamelist_t *spnlp = NULL; 5615 int count; 5616 int rval = 0; 5617 int verbose = (options & MDCMD_VERBOSE); 5618 5619 /* get unit structure list */ 5620 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5621 if (count <= 0) 5622 return (-1); 5623 5624 meta_sp_list_insert(NULL, NULL, &unitlist, 5625 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5626 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5627 5628 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5629 metafreenamelist(spnlp); 5630 return (-1); 5631 } 5632 5633 metafreenamelist(spnlp); 5634 5635 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5636 5637 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5638 meta_sp_cmp_by_offset, ep) < 0) { 5639 meta_sp_list_free(&unitlist); 5640 return (-1); 5641 } 5642 5643 if (getenv(META_SP_DEBUG)) { 5644 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5645 meta_sp_list_dump(unitlist); 5646 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5647 meta_sp_list_dump(wmlist); 5648 } 5649 5650 /* 5651 * step through both lists and compare allocated nodes. Free 5652 * nodes and end watermarks may differ between the two but 5653 * that's generally ok, and if they're wrong will typically 5654 * cause misplaced allocated extents. 5655 */ 5656 if (verbose) 5657 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5658 "allocations match extent headers.\n"), np->cname); 5659 5660 unitext = unitlist; 5661 wmext = wmlist; 5662 while ((wmext != NULL) && (unitext != NULL)) { 5663 /* find next allocated extents in each list */ 5664 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5665 wmext = wmext->ext_next; 5666 5667 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5668 unitext = unitext->ext_next; 5669 5670 if (wmext == NULL || unitext == NULL) 5671 break; 5672 5673 if (verbose) { 5674 (void) printf(dgettext(TEXT_DOMAIN, 5675 "Metadb extent:\n")); 5676 meta_sp_display_exthdr(); 5677 meta_sp_display_ext(unitext); 5678 (void) printf(dgettext(TEXT_DOMAIN, 5679 "Extent header extent:\n")); 5680 meta_sp_display_exthdr(); 5681 meta_sp_display_ext(wmext); 5682 (void) printf("\n"); 5683 } 5684 5685 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5686 rval = -1; 5687 5688 /* 5689 * if the offsets aren't equal, only increment the 5690 * lowest one in hopes of getting the lists back in sync. 5691 */ 5692 tmpunitoff = unitext->ext_offset; 5693 if (unitext->ext_offset <= wmext->ext_offset) 5694 unitext = unitext->ext_next; 5695 if (wmext->ext_offset <= tmpunitoff) 5696 wmext = wmext->ext_next; 5697 } 5698 5699 /* 5700 * if both lists aren't at the end then there are extra 5701 * allocated nodes in one of them. 5702 */ 5703 if (wmext != NULL) { 5704 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5705 "%s: extent headers contain allocations not in " 5706 "the metadb\n\n"), np->cname); 5707 rval = -1; 5708 } 5709 5710 if (unitext != NULL) { 5711 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5712 "%s: metadb contains allocations not in the extent " 5713 "headers\n\n"), np->cname); 5714 rval = -1; 5715 } 5716 5717 if (options & MDCMD_PRINT) { 5718 if (rval == 0) { 5719 (void) printf(dgettext(TEXT_DOMAIN, 5720 "%s: Soft Partition metadb matches extent " 5721 "header configuration\n"), np->cname); 5722 } else { 5723 (void) printf(dgettext(TEXT_DOMAIN, 5724 "%s: Soft Partition metadb does not match extent " 5725 "header configuration\n"), np->cname); 5726 } 5727 } 5728 5729 return (rval); 5730 } 5731 5732 /* 5733 * FUNCTION: meta_sp_validate_exts() 5734 * INPUT: compnp - name pointer for device we are recovering from 5735 * wmext - extent node representing watermark 5736 * unitext - extent node from unit structure 5737 * OUTPUT: ep - return error pointer 5738 * RETURNS: int - 0 - succes, mdmderror return code - error 5739 * PURPOSE: Takes two extent nodes and checks them against each other. 5740 * offset, length, sequence number, set, and name are compared. 5741 */ 5742 static int 5743 meta_sp_validate_exts( 5744 mdname_t *compnp, 5745 sp_ext_node_t *wmext, 5746 sp_ext_node_t *unitext, 5747 md_error_t *ep 5748 ) 5749 { 5750 if (wmext->ext_offset != unitext->ext_offset) { 5751 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5752 "%s: unit structure and extent header offsets differ.\n"), 5753 compnp->cname); 5754 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5755 } 5756 5757 if (wmext->ext_length != unitext->ext_length) { 5758 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5759 "%s: unit structure and extent header lengths differ.\n"), 5760 compnp->cname); 5761 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5762 } 5763 5764 if (wmext->ext_seq != unitext->ext_seq) { 5765 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5766 "%s: unit structure and extent header sequence numbers " 5767 "differ.\n"), compnp->cname); 5768 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5769 } 5770 5771 if (wmext->ext_type != unitext->ext_type) { 5772 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5773 "%s: unit structure and extent header types differ.\n"), 5774 compnp->cname); 5775 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5776 } 5777 5778 /* 5779 * If one has a set pointer and the other doesn't, error. 5780 * If both extents have setnames, then make sure they match 5781 * If both are NULL, it's ok, they match. 5782 */ 5783 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5784 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5785 "%s: unit structure and extent header set values " 5786 "differ.\n"), compnp->cname); 5787 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5788 } 5789 5790 if (unitext->ext_setp != NULL) { 5791 if (strcmp(unitext->ext_setp->setname, 5792 wmext->ext_setp->setname) != 0) { 5793 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5794 "%s: unit structure and extent header set names " 5795 "differ.\n"), compnp->cname); 5796 return (mdmderror(ep, MDE_RECOVER_FAILED, 5797 0, compnp->cname)); 5798 } 5799 } 5800 5801 /* 5802 * If one has a name pointer and the other doesn't, error. 5803 * If both extents have names, then make sure they match 5804 * If both are NULL, it's ok, they match. 5805 */ 5806 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5807 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5808 "%s: unit structure and extent header name values " 5809 "differ.\n"), compnp->cname); 5810 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5811 } 5812 5813 if (unitext->ext_namep != NULL) { 5814 if (strcmp(wmext->ext_namep->cname, 5815 unitext->ext_namep->cname) != 0) { 5816 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5817 "%s: unit structure and extent header names " 5818 "differ.\n"), compnp->cname); 5819 return (mdmderror(ep, MDE_RECOVER_FAILED, 5820 0, compnp->cname)); 5821 } 5822 } 5823 5824 return (0); 5825 } 5826 5827 /* 5828 * FUNCTION: update_sp_status() 5829 * INPUT: sp - name of set we are recovering in 5830 * minors - pointer to an array of soft partition minor numbers 5831 * num_sps - number of minor numbers in array 5832 * status - new status to be applied to all soft parts in array 5833 * mn_set - set if current set is a multi-node set 5834 * OUTPUT: ep - return error pointer 5835 * RETURNS: int - 0 - success, -1 - error 5836 * PURPOSE: update status of soft partitions to new status. minors is an 5837 * array of minor numbers to apply the new status to. 5838 * If mn_set is set, a message is sent to all nodes in the 5839 * cluster to update the status locally. 5840 */ 5841 static int 5842 update_sp_status( 5843 mdsetname_t *sp, 5844 minor_t *minors, 5845 int num_sps, 5846 sp_status_t status, 5847 bool_t mn_set, 5848 md_error_t *ep 5849 ) 5850 { 5851 int i; 5852 int err = 0; 5853 5854 if (mn_set) { 5855 md_mn_msg_sp_setstat_t sp_setstat_params; 5856 int result; 5857 md_mn_result_t *resp = NULL; 5858 5859 for (i = 0; i < num_sps; i++) { 5860 sp_setstat_params.sp_setstat_mnum = minors[i]; 5861 sp_setstat_params.sp_setstat_status = status; 5862 5863 result = mdmn_send_message(sp->setno, 5864 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0, 5865 (char *)&sp_setstat_params, 5866 sizeof (sp_setstat_params), 5867 &resp, ep); 5868 if (resp != NULL) { 5869 if (resp->mmr_exitval != 0) 5870 err = -1; 5871 free_result(resp); 5872 } 5873 if (result != 0) { 5874 err = -1; 5875 } 5876 } 5877 } else { 5878 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5879 err = -1; 5880 } 5881 if (err < 0) { 5882 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5883 "Error updating status on recovered soft " 5884 "partitions.\n")); 5885 } 5886 return (err); 5887 } 5888 5889 /* 5890 * FUNCTION: meta_sp_recover_from_wm() 5891 * INPUT: sp - name of set we are recovering in 5892 * compnp - name pointer for component we are recovering from 5893 * options - metarecover options 5894 * OUTPUT: ep - return error pointer 5895 * RETURNS: int - 0 - success, -1 - error 5896 * PURPOSE: update metadb records to match watermarks. begin by getting 5897 * an extlist representing all soft partitions on the component. 5898 * then build a unit structure for each soft partition. 5899 * notify user of changes, then commit each soft partition to 5900 * the metadb one at a time in the "recovering" state. update 5901 * any watermarks that may need it (to reflect possible name 5902 * changes), and, finally, set the status of all recovered 5903 * partitions to the "OK" state at once. 5904 */ 5905 static int 5906 meta_sp_recover_from_wm( 5907 mdsetname_t *sp, 5908 mdname_t *compnp, 5909 mdcmdopts_t options, 5910 md_error_t *ep 5911 ) 5912 { 5913 sp_ext_node_t *extlist = NULL; 5914 sp_ext_node_t *sp_list = NULL; 5915 sp_ext_node_t *update_list = NULL; 5916 sp_ext_node_t *ext; 5917 sp_ext_node_t *sp_ext; 5918 mp_unit_t *mp; 5919 mp_unit_t **un_array; 5920 int numexts = 0, num_sps = 0, i = 0; 5921 int err = 0; 5922 int not_recovered = 0; 5923 int committed = 0; 5924 sp_ext_length_t sp_length = 0LL; 5925 mdnamelist_t *keynlp = NULL; 5926 mdname_t *np; 5927 mdname_t *new_np; 5928 int new_name; 5929 md_set_params_t set_params; 5930 minor_t *minors = NULL; 5931 char yesno[255]; 5932 char *yes; 5933 bool_t mn_set = 0; 5934 md_set_desc *sd; 5935 mm_unit_t *mm; 5936 md_set_mmown_params_t *ownpar = NULL; 5937 int comp_is_mirror = 0; 5938 5939 /* 5940 * if this component appears in another metadevice already, do 5941 * NOT recover from it. 5942 */ 5943 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5944 return (-1); 5945 5946 /* set flag if dealing with a MN set */ 5947 if (!metaislocalset(sp)) { 5948 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5949 return (-1); 5950 } 5951 if (MD_MNSET_DESC(sd)) 5952 mn_set = 1; 5953 } 5954 /* 5955 * for each watermark, build an ext_node, place on list. 5956 */ 5957 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5958 meta_sp_cmp_by_nameseq, ep) < 0) 5959 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5960 5961 assert(extlist != NULL); 5962 5963 /* count number of soft partitions */ 5964 for (ext = extlist; 5965 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5966 ext = ext->ext_next) { 5967 if (ext->ext_next != NULL && 5968 ext->ext_next->ext_namep != NULL && 5969 strcmp(ext->ext_next->ext_namep->cname, 5970 ext->ext_namep->cname) == 0) 5971 continue; 5972 num_sps++; 5973 } 5974 5975 /* allocate array of unit structure pointers */ 5976 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5977 5978 /* 5979 * build unit structures from list of ext_nodes. 5980 */ 5981 for (ext = extlist; 5982 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5983 ext = ext->ext_next) { 5984 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5985 &sp_list, ext->ext_offset, ext->ext_length, 5986 ext->ext_type, ext->ext_seq, ext->ext_flags, 5987 meta_sp_cmp_by_nameseq); 5988 5989 numexts++; 5990 sp_length += ext->ext_length - MD_SP_WMSIZE; 5991 5992 if (ext->ext_next != NULL && 5993 ext->ext_next->ext_namep != NULL && 5994 strcmp(ext->ext_next->ext_namep->cname, 5995 ext->ext_namep->cname) == 0) 5996 continue; 5997 5998 /* 5999 * if we made it here, we are at a soft partition 6000 * boundary in the list. 6001 */ 6002 if (getenv(META_SP_DEBUG)) { 6003 meta_sp_debug("meta_recover_from_wm: dumping wm " 6004 "list:\n"); 6005 meta_sp_list_dump(sp_list); 6006 } 6007 6008 assert(sp_list != NULL); 6009 assert(sp_list->ext_namep != NULL); 6010 6011 if ((new_name = meta_sp_resolve_name_conflict(sp, 6012 sp_list->ext_namep, &new_np, ep)) < 0) { 6013 err = 1; 6014 goto out; 6015 } else if (new_name) { 6016 for (sp_ext = sp_list; 6017 sp_ext != NULL; 6018 sp_ext = sp_ext->ext_next) { 6019 /* 6020 * insert into the update list for 6021 * watermark update. 6022 */ 6023 meta_sp_list_insert(sp_ext->ext_setp, 6024 new_np, &update_list, sp_ext->ext_offset, 6025 sp_ext->ext_length, sp_ext->ext_type, 6026 sp_ext->ext_seq, EXTFLG_UPDATE, 6027 meta_sp_cmp_by_offset); 6028 } 6029 6030 } 6031 if (options & MDCMD_DOIT) { 6032 /* store name in namespace */ 6033 if (mn_set) { 6034 /* send message to all nodes to return key */ 6035 md_mn_msg_addkeyname_t *send_params; 6036 int result; 6037 md_mn_result_t *resp = NULL; 6038 int message_size; 6039 6040 message_size = sizeof (*send_params) + 6041 strlen(compnp->cname) + 1; 6042 send_params = Zalloc(message_size); 6043 send_params->addkeyname_setno = sp->setno; 6044 (void) strcpy(&send_params->addkeyname_name[0], 6045 compnp->cname); 6046 result = mdmn_send_message(sp->setno, 6047 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6048 0, (char *)send_params, message_size, &resp, 6049 ep); 6050 Free(send_params); 6051 if (resp != NULL) { 6052 if (resp->mmr_exitval >= 0) { 6053 compnp->key = 6054 (mdkey_t)resp->mmr_exitval; 6055 } else { 6056 err = 1; 6057 free_result(resp); 6058 goto out; 6059 } 6060 free_result(resp); 6061 } 6062 if (result != 0) { 6063 err = 1; 6064 goto out; 6065 } 6066 (void) metanamelist_append(&keynlp, compnp); 6067 } else { 6068 if (add_key_name(sp, compnp, &keynlp, 6069 ep) != 0) { 6070 err = 1; 6071 goto out; 6072 } 6073 } 6074 } 6075 6076 /* create the unit structure */ 6077 if ((mp = meta_sp_createunit( 6078 (new_name) ? new_np : sp_list->ext_namep, compnp, 6079 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6080 err = 1; 6081 goto out; 6082 } 6083 6084 if (getenv(META_SP_DEBUG)) { 6085 meta_sp_debug("meta_sp_recover_from_wm: " 6086 "printing newly created unit structure"); 6087 meta_sp_printunit(mp); 6088 } 6089 6090 /* place in unit structure array */ 6091 un_array[i++] = mp; 6092 6093 /* free sp_list */ 6094 meta_sp_list_free(&sp_list); 6095 sp_list = NULL; 6096 numexts = 0; 6097 sp_length = 0LL; 6098 } 6099 6100 /* display configuration updates */ 6101 (void) printf(dgettext(TEXT_DOMAIN, 6102 "The following soft partitions were found and will be added to\n" 6103 "your metadevice configuration.\n")); 6104 (void) printf("%5s %15s %18s\n", 6105 dgettext(TEXT_DOMAIN, "Name"), 6106 dgettext(TEXT_DOMAIN, "Size"), 6107 dgettext(TEXT_DOMAIN, "No. of Extents")); 6108 for (i = 0; i < num_sps; i++) { 6109 (void) printf("%5s%lu %15llu %9d\n", "d", 6110 MD_MIN2UNIT(MD_SID(un_array[i])), 6111 un_array[i]->un_length, un_array[i]->un_numexts); 6112 } 6113 6114 if (!(options & MDCMD_DOIT)) { 6115 not_recovered = 1; 6116 goto out; 6117 } 6118 6119 /* ask user for confirmation */ 6120 (void) printf(dgettext(TEXT_DOMAIN, 6121 "WARNING: You are about to add one or more soft partition\n" 6122 "metadevices to your metadevice configuration. If there\n" 6123 "appears to be an error in the soft partition(s) displayed\n" 6124 "above, do NOT proceed with this recovery operation.\n")); 6125 (void) printf(dgettext(TEXT_DOMAIN, 6126 "Are you sure you want to do this (yes/no)? ")); 6127 6128 (void) fflush(stdout); 6129 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6130 (strlen(yesno) == 1)) 6131 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6132 dgettext(TEXT_DOMAIN, "no")); 6133 yes = dgettext(TEXT_DOMAIN, "yes"); 6134 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6135 not_recovered = 1; 6136 goto out; 6137 } 6138 6139 /* commit records one at a time */ 6140 for (i = 0; i < num_sps; i++) { 6141 (void) memset(&set_params, 0, sizeof (set_params)); 6142 set_params.mnum = MD_SID(un_array[i]); 6143 set_params.size = (un_array[i])->c.un_size; 6144 set_params.mdp = (uintptr_t)(un_array[i]); 6145 set_params.options = 6146 meta_check_devicesize(un_array[i]->un_length); 6147 if (set_params.options == MD_CRO_64BIT) { 6148 un_array[i]->c.un_revision |= MD_64BIT_META_DEV; 6149 } else { 6150 un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV; 6151 } 6152 MD_SETDRIVERNAME(&set_params, MD_SP, 6153 MD_MIN2SET(set_params.mnum)); 6154 6155 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6156 6157 /* 6158 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6159 */ 6160 if (mn_set) { 6161 md_mn_msg_iocset_t send_params; 6162 int result; 6163 md_mn_result_t *resp = NULL; 6164 int mess_size; 6165 6166 /* 6167 * Calculate message size. md_mn_msg_iocset_t only 6168 * contains one extent, so increment the size to 6169 * include all extents 6170 */ 6171 mess_size = sizeof (send_params) - 6172 sizeof (mp_ext_t) + 6173 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6174 6175 send_params.iocset_params = set_params; 6176 (void) memcpy(&send_params.unit, un_array[i], 6177 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6178 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6179 result = mdmn_send_message(sp->setno, 6180 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0, 6181 (char *)&send_params, mess_size, &resp, 6182 ep); 6183 if (resp != NULL) { 6184 if (resp->mmr_exitval != 0) 6185 err = 1; 6186 free_result(resp); 6187 } 6188 if (result != 0) { 6189 err = 1; 6190 } 6191 } else { 6192 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6193 np->cname) != 0) { 6194 err = 1; 6195 } 6196 } 6197 6198 if (err == 1) { 6199 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6200 "%s: Error committing record to metadb.\n"), 6201 np->cname); 6202 goto out; 6203 } 6204 6205 /* note that we've committed a record */ 6206 if (!committed) 6207 committed = 1; 6208 6209 /* update any watermarks that need it */ 6210 if (update_list != NULL) { 6211 md_sp_t *msp; 6212 6213 /* 6214 * Check to see if we're trying to create a partition 6215 * on a mirror. If so we may have to enforce an 6216 * ownership change before writing the watermark out. 6217 */ 6218 if (metaismeta(compnp)) { 6219 char *miscname; 6220 6221 miscname = metagetmiscname(compnp, ep); 6222 if (miscname != NULL) 6223 comp_is_mirror = (strcmp(miscname, 6224 MD_MIRROR) == 0); 6225 else 6226 comp_is_mirror = 0; 6227 } 6228 /* 6229 * If this is a MN set and the component is a mirror, 6230 * change ownership to this node in order to write the 6231 * watermarks 6232 */ 6233 if (mn_set && comp_is_mirror) { 6234 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6235 if (mm == NULL) { 6236 err = 1; 6237 goto out; 6238 } else { 6239 err = meta_mn_change_owner(&ownpar, 6240 sp->setno, 6241 meta_getminor(compnp->dev), 6242 sd->sd_mn_mynode->nd_nodeid, 6243 MD_MN_MM_PREVENT_CHANGE | 6244 MD_MN_MM_SPAWN_THREAD); 6245 if (err != 0) 6246 goto out; 6247 } 6248 } 6249 6250 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6251 err = 1; 6252 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6253 "%s: Error updating extent headers.\n"), 6254 np->cname); 6255 goto out; 6256 } 6257 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6258 err = 1; 6259 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6260 "%s: Error updating extent headers " 6261 "on disk.\n"), np->cname); 6262 goto out; 6263 } 6264 } 6265 /* 6266 * If we have changed ownership earlier and prevented any 6267 * ownership changes, we can now allow ownership changes 6268 * again. 6269 */ 6270 if (ownpar) { 6271 (void) meta_mn_change_owner(&ownpar, sp->setno, 6272 ownpar->d.mnum, 6273 ownpar->d.owner, 6274 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6275 } 6276 } 6277 6278 /* update status of all soft partitions to OK */ 6279 minors = Zalloc(num_sps * sizeof (minor_t)); 6280 for (i = 0; i < num_sps; i++) 6281 minors[i] = MD_SID(un_array[i]); 6282 6283 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6284 if (err != 0) 6285 goto out; 6286 6287 if (options & MDCMD_PRINT) 6288 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6289 "Soft Partitions recovered from device.\n"), 6290 compnp->cname); 6291 out: 6292 /* free memory */ 6293 if (extlist != NULL) 6294 meta_sp_list_free(&extlist); 6295 if (sp_list != NULL) 6296 meta_sp_list_free(&sp_list); 6297 if (update_list != NULL) 6298 meta_sp_list_free(&update_list); 6299 if (un_array != NULL) { 6300 for (i = 0; i < num_sps; i++) 6301 Free(un_array[i]); 6302 Free(un_array); 6303 } 6304 if (minors != NULL) 6305 Free(minors); 6306 if (ownpar != NULL) 6307 Free(ownpar); 6308 (void) fflush(stdout); 6309 6310 if ((keynlp != NULL) && (committed != 1)) { 6311 /* 6312 * if we haven't committed any softparts, either because of an 6313 * error or because the user decided not to proceed, delete 6314 * namelist key for the component 6315 */ 6316 if (mn_set) { 6317 mdnamelist_t *p; 6318 6319 for (p = keynlp; (p != NULL); p = p->next) { 6320 mdname_t *np = p->namep; 6321 md_mn_msg_delkeyname_t send_params; 6322 md_mn_result_t *resp = NULL; 6323 6324 send_params.delkeyname_dev = np->dev; 6325 send_params.delkeyname_setno = sp->setno; 6326 send_params.delkeyname_key = np->key; 6327 (void) mdmn_send_message(sp->setno, 6328 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6329 0, (char *)&send_params, 6330 sizeof (send_params), 6331 &resp, ep); 6332 if (resp != NULL) { 6333 free_result(resp); 6334 } 6335 } 6336 } else { 6337 (void) del_key_names(sp, keynlp, NULL); 6338 } 6339 } 6340 6341 metafreenamelist(keynlp); 6342 6343 if (err) 6344 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6345 6346 if (not_recovered) 6347 if (options & MDCMD_PRINT) 6348 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6349 "Soft Partitions NOT recovered from device.\n"), 6350 compnp->cname); 6351 return (0); 6352 } 6353 6354 /* 6355 * FUNCTION: meta_sp_recover_from_unit() 6356 * INPUT: sp - name of set we are recovering in 6357 * compnp - name of component we are recovering from 6358 * options - metarecover options 6359 * OUTPUT: ep - return error pointer 6360 * RETURNS: int - 0 - success, -1 - error 6361 * PURPOSE: update watermarks to match metadb records. begin by getting 6362 * a namelist representing all soft partitions on the specified 6363 * component. then, build an extlist representing the soft 6364 * partitions, filling in the freespace extents. notify user 6365 * of changes, place all soft partitions into the "recovering" 6366 * state and update the watermarks. finally, return all soft 6367 * partitions to the "OK" state. 6368 */ 6369 static int 6370 meta_sp_recover_from_unit( 6371 mdsetname_t *sp, 6372 mdname_t *compnp, 6373 mdcmdopts_t options, 6374 md_error_t *ep 6375 ) 6376 { 6377 mdnamelist_t *spnlp = NULL; 6378 mdnamelist_t *nlp = NULL; 6379 sp_ext_node_t *ext = NULL; 6380 sp_ext_node_t *extlist = NULL; 6381 int count; 6382 char yesno[255]; 6383 char *yes; 6384 int rval = 0; 6385 minor_t *minors = NULL; 6386 int i; 6387 md_sp_t *msp; 6388 md_set_desc *sd; 6389 bool_t mn_set = 0; 6390 daddr_t start_block; 6391 6392 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6393 if (count <= 0) 6394 return (-1); 6395 6396 /* set flag if dealing with a MN set */ 6397 if (!metaislocalset(sp)) { 6398 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6399 return (-1); 6400 } 6401 if (MD_MNSET_DESC(sd)) 6402 mn_set = 1; 6403 } 6404 /* 6405 * Save the XDR unit structure for one of the soft partitions; 6406 * we'll use this later to provide metadevice context to 6407 * update the watermarks so the device can be resolved by 6408 * devid instead of dev_t. 6409 */ 6410 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6411 metafreenamelist(spnlp); 6412 return (-1); 6413 } 6414 6415 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6416 MD_DISKADDR_ERROR) { 6417 return (-1); 6418 } 6419 6420 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6421 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6422 meta_sp_list_insert(NULL, NULL, &extlist, 6423 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6424 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6425 6426 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6427 metafreenamelist(spnlp); 6428 return (-1); 6429 } 6430 6431 assert(extlist != NULL); 6432 if ((options & MDCMD_VERBOSE) != 0) { 6433 (void) printf(dgettext(TEXT_DOMAIN, 6434 "Updating extent headers on device %s from metadb.\n\n"), 6435 compnp->cname); 6436 (void) printf(dgettext(TEXT_DOMAIN, 6437 "The following extent headers will be written:\n")); 6438 meta_sp_display_exthdr(); 6439 } 6440 6441 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6442 6443 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6444 6445 /* mark every node for updating except the reserved space */ 6446 if (ext->ext_type != EXTTYP_RESERVED) { 6447 ext->ext_flags |= EXTFLG_UPDATE; 6448 6449 /* print extent information */ 6450 if ((options & MDCMD_VERBOSE) != 0) 6451 meta_sp_display_ext(ext); 6452 } 6453 } 6454 6455 /* request verification and then update all watermarks */ 6456 if ((options & MDCMD_DOIT) != 0) { 6457 6458 (void) printf(dgettext(TEXT_DOMAIN, 6459 "\nWARNING: You are about to overwrite portions of %s\n" 6460 "with soft partition metadata. The extent headers will be\n" 6461 "written to match the existing metadb configuration. If\n" 6462 "the device was not previously setup with this\n" 6463 "configuration, data loss may result.\n\n"), 6464 compnp->cname); 6465 (void) printf(dgettext(TEXT_DOMAIN, 6466 "Are you sure you want to do this (yes/no)? ")); 6467 6468 (void) fflush(stdout); 6469 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6470 (strlen(yesno) == 1)) 6471 (void) snprintf(yesno, sizeof (yesno), 6472 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6473 yes = dgettext(TEXT_DOMAIN, "yes"); 6474 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6475 /* place soft partitions into recovering state */ 6476 minors = Zalloc(count * sizeof (minor_t)); 6477 for (nlp = spnlp, i = 0; 6478 nlp != NULL && i < count; 6479 nlp = nlp->next, i++) { 6480 assert(nlp->namep != NULL); 6481 minors[i] = meta_getminor(nlp->namep->dev); 6482 } 6483 if (update_sp_status(sp, minors, count, 6484 MD_SP_RECOVER, mn_set, ep) != 0) { 6485 rval = -1; 6486 goto out; 6487 } 6488 6489 /* update the watermarks */ 6490 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6491 rval = -1; 6492 goto out; 6493 } 6494 6495 if (options & MDCMD_PRINT) { 6496 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6497 "Soft Partitions recovered from metadb\n"), 6498 compnp->cname); 6499 } 6500 6501 /* return soft partitions to the OK state */ 6502 if (update_sp_status(sp, minors, count, 6503 MD_SP_OK, mn_set, ep) != 0) { 6504 rval = -1; 6505 goto out; 6506 } 6507 6508 rval = 0; 6509 goto out; 6510 } 6511 } 6512 6513 if (options & MDCMD_PRINT) { 6514 (void) printf(dgettext(TEXT_DOMAIN, 6515 "%s: Soft Partitions NOT recovered from metadb\n"), 6516 compnp->cname); 6517 } 6518 6519 out: 6520 if (minors != NULL) 6521 Free(minors); 6522 metafreenamelist(spnlp); 6523 meta_sp_list_free(&extlist); 6524 (void) fflush(stdout); 6525 return (rval); 6526 } 6527 6528 6529 /* 6530 * FUNCTION: meta_sp_update_abr() 6531 * INPUT: sp - name of set we are recovering in 6532 * OUTPUT: ep - return error pointer 6533 * RETURNS: int - 0 - success, -1 - error 6534 * PURPOSE: update the ABR state for all soft partitions in the set. This 6535 * is called when joining a set. It sends a message to the master 6536 * node for each soft partition to get the value of tstate and 6537 * then sets ABR ,if required, by opening the sp, setting ABR 6538 * and then closing the sp. This approach is taken rather that 6539 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6540 * the case when we have another node simultaneously unsetting ABR. 6541 */ 6542 int 6543 meta_sp_update_abr( 6544 mdsetname_t *sp, 6545 md_error_t *ep 6546 ) 6547 { 6548 mdnamelist_t *devnlp = NULL; 6549 mdnamelist_t *p; 6550 mdname_t *devnp = NULL; 6551 md_unit_t *un; 6552 char fname[MAXPATHLEN]; 6553 int mnum, fd; 6554 volcap_t vc; 6555 uint_t tstate; 6556 6557 6558 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6559 return (-1); 6560 } 6561 6562 /* Exit if no soft partitions in this set */ 6563 if (devnlp == NULL) 6564 return (0); 6565 6566 /* For each soft partition */ 6567 for (p = devnlp; (p != NULL); p = p->next) { 6568 devnp = p->namep; 6569 6570 /* check if this is a top level metadevice */ 6571 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6572 goto out; 6573 if (MD_HAS_PARENT(MD_PARENT(un))) { 6574 Free(un); 6575 continue; 6576 } 6577 Free(un); 6578 6579 /* Get tstate from Master */ 6580 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6581 mdname_t *np; 6582 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6583 ep); 6584 if (np) { 6585 md_perror(dgettext(TEXT_DOMAIN, 6586 "Unable to get tstate for %s"), np->cname); 6587 } 6588 continue; 6589 } 6590 /* If not set on the master, nothing to do */ 6591 if (!(tstate & MD_ABR_CAP)) 6592 continue; 6593 6594 mnum = meta_getminor(devnp->dev); 6595 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6596 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6597 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6598 md_perror(dgettext(TEXT_DOMAIN, 6599 "Could not open device %s"), fname); 6600 continue; 6601 } 6602 6603 /* Set ABR state */ 6604 vc.vc_info = 0; 6605 vc.vc_set = 0; 6606 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6607 (void) close(fd); 6608 continue; 6609 } 6610 6611 vc.vc_set = DKV_ABR_CAP; 6612 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6613 (void) close(fd); 6614 goto out; 6615 } 6616 6617 (void) close(fd); 6618 } 6619 metafreenamelist(devnlp); 6620 return (0); 6621 out: 6622 metafreenamelist(devnlp); 6623 return (-1); 6624 } 6625 6626 /* 6627 * FUNCTION: meta_mn_sp_update_abr() 6628 * INPUT: arg - Given set. 6629 * PURPOSE: update the ABR state for all soft partitions in the set by 6630 * forking a process to call meta_sp_update_abr() 6631 * This function is only called via rpc.metad when adding a node 6632 * to a set, ie this node is beong joined to the set by another 6633 * node. 6634 */ 6635 void * 6636 meta_mn_sp_update_abr(void *arg) 6637 { 6638 set_t setno = *((set_t *)arg); 6639 mdsetname_t *sp; 6640 md_error_t mde = mdnullerror; 6641 int fval; 6642 6643 /* should have a set */ 6644 assert(setno != NULL); 6645 6646 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6647 mde_perror(&mde, ""); 6648 return (NULL); 6649 } 6650 6651 if (!(meta_is_mn_set(sp, &mde))) { 6652 mde_perror(&mde, ""); 6653 return (NULL); 6654 } 6655 6656 /* fork a process */ 6657 if ((fval = md_daemonize(sp, &mde)) != 0) { 6658 /* 6659 * md_daemonize will fork off a process. The is the 6660 * parent or error. 6661 */ 6662 if (fval > 0) { 6663 return (NULL); 6664 } 6665 mde_perror(&mde, ""); 6666 return (NULL); 6667 } 6668 /* 6669 * Child process should never return back to rpc.metad, but 6670 * should exit. 6671 * Flush all internally cached data inherited from parent process 6672 * since cached data will be cleared when parent process RPC request 6673 * has completed (which is possibly before this child process 6674 * can complete). 6675 * Child process can retrieve and cache its own copy of data from 6676 * rpc.metad that won't be changed by the parent process. 6677 * 6678 * Reset md_in_daemon since this child will be a client of rpc.metad 6679 * not part of the rpc.metad daemon itself. 6680 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6681 * this thread is rpc.metad or any other thread. (If this thread 6682 * was rpc.metad it could use some short circuit code to get data 6683 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6684 */ 6685 md_in_daemon = 0; 6686 metaflushsetname(sp); 6687 sr_cache_flush_setno(setno); 6688 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6689 mde_perror(&mde, ""); 6690 md_exit(sp, 1); 6691 } 6692 6693 6694 /* 6695 * Closing stdin/out/err here. 6696 */ 6697 (void) close(0); 6698 (void) close(1); 6699 (void) close(2); 6700 assert(fval == 0); 6701 6702 (void) meta_sp_update_abr(sp, &mde); 6703 6704 md_exit(sp, 0); 6705 /*NOTREACHED*/ 6706 return (NULL); 6707 } 6708 6709 int 6710 meta_sp_check_component( 6711 mdsetname_t *sp, 6712 mdname_t *np, 6713 md_error_t *ep 6714 ) 6715 { 6716 md_sp_t *msp; 6717 minor_t mnum = 0; 6718 md_dev64_t dev = 0; 6719 mdnm_params_t nm; 6720 md_getdevs_params_t mgd; 6721 side_t sideno; 6722 char *miscname; 6723 md_dev64_t *mydev = NULL; 6724 char *pname = NULL, *t; 6725 char *ctd_name = NULL; 6726 char *devname = NULL; 6727 int len; 6728 int rval = -1; 6729 6730 (void) memset(&nm, '\0', sizeof (nm)); 6731 if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL) 6732 return (-1); 6733 6734 if ((miscname = metagetmiscname(np, ep)) == NULL) 6735 return (-1); 6736 6737 sideno = getmyside(sp, ep); 6738 6739 meta_sp_debug("meta_sp_check_component: %s is on %s key: %d" 6740 " dev: %llu\n", 6741 np->cname, msp->compnamep->cname, msp->compnamep->key, 6742 msp->compnamep->dev); 6743 6744 /* 6745 * Now get the data from the unit structure. The compnamep stuff 6746 * contains the data from the namespace and we need the un_dev 6747 * from the unit structure. 6748 */ 6749 (void) memset(&mgd, '\0', sizeof (mgd)); 6750 MD_SETDRIVERNAME(&mgd, miscname, sp->setno); 6751 mgd.cnt = 1; /* sp's only have one subdevice */ 6752 mgd.mnum = meta_getminor(np->dev); 6753 6754 mydev = Zalloc(sizeof (*mydev)); 6755 mgd.devs = (uintptr_t)mydev; 6756 6757 if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) { 6758 meta_sp_debug("meta_sp_check_component: ioctl failed\n"); 6759 (void) mdstealerror(ep, &mgd.mde); 6760 rval = 0; 6761 goto out; 6762 } else if (mgd.cnt <= 0) { 6763 assert(mgd.cnt >= 0); 6764 rval = 0; 6765 goto out; 6766 } 6767 6768 /* Get the devname from the name space. */ 6769 if ((devname = meta_getnmentbykey(sp->setno, sideno, 6770 msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) { 6771 meta_sp_debug("meta_sp_check_component: key %d not" 6772 "found\n", msp->compnamep->key); 6773 goto out; 6774 } 6775 6776 meta_sp_debug("dev %s from component: (%lu, %lu)\n", 6777 devname, 6778 meta_getmajor(*mydev), 6779 meta_getminor(*mydev)); 6780 meta_sp_debug("minor from the namespace: %lu\n", mnum); 6781 6782 if (mnum != meta_getminor(*mydev)) { 6783 /* 6784 * The minor numbers are different. Update the namespace 6785 * with the information from the component. 6786 */ 6787 6788 t = strrchr(devname, '/'); 6789 t++; 6790 ctd_name = Strdup(t); 6791 6792 meta_sp_debug("meta_sp_check_component: ctd_name: %s\n", 6793 ctd_name); 6794 6795 len = strlen(devname); 6796 t = strrchr(devname, '/'); 6797 t++; 6798 pname = Zalloc((len - strlen(t)) + 1); 6799 (void) strncpy(pname, devname, (len - strlen(t))); 6800 meta_sp_debug("pathname: %s\n", pname); 6801 6802 meta_sp_debug("updating the minor number to %lu\n", nm.mnum); 6803 6804 if (meta_update_namespace(sp->setno, sideno, 6805 ctd_name, *mydev, msp->compnamep->key, pname, 6806 ep) != 0) { 6807 goto out; 6808 } 6809 } 6810 out: 6811 if (pname != NULL) 6812 Free(pname); 6813 if (ctd_name != NULL) 6814 Free(ctd_name); 6815 if (devname != NULL) 6816 Free(devname); 6817 if (mydev != NULL) 6818 Free(mydev); 6819 return (rval); 6820 } 6821