1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Just in case we're not in a build environment, make sure that 31 * TEXT_DOMAIN gets set to something. 32 */ 33 #if !defined(TEXT_DOMAIN) 34 #define TEXT_DOMAIN "SYS_TEST" 35 #endif 36 37 /* 38 * soft partition operations 39 * 40 * Soft Partitions provide a virtual disk mechanism which is used to 41 * divide a large volume into many small pieces, each appearing as a 42 * separate device. A soft partition consists of a series of extents, 43 * each having an offset and a length. The extents are logically 44 * contiguous, so where the first extent leaves off the second extent 45 * picks up. Which extent a given "virtual offset" belongs to is 46 * dependent on the size of all the previous extents in the soft 47 * partition. 48 * 49 * Soft partitions are represented in memory by an extent node 50 * (sp_ext_node_t) which contains all of the information necessary to 51 * create a unit structure and update the on-disk format, called 52 * "watermarks". These extent nodes are typically kept in a doubly 53 * linked list and are manipulated by list manipulation routines. A 54 * list of extents may represent all of the soft partitions on a volume, 55 * a single soft partition, or perhaps just a set of extents that need 56 * to be updated. Extent lists may be sorted by extent or by name/seq#, 57 * depending on which compare function is used. Most of the routines 58 * require the list be sorted by offset to work, and that's the typical 59 * configuration. 60 * 61 * In order to do an allocation, knowledge of all soft partitions on the 62 * volume is required. Then free space is determined from the space 63 * that is not allocated, and new allocations can be made from the free 64 * space. Once the new allocations are made, a unit structure is created 65 * and the watermarks are updated. The status is then changed to "okay" 66 * on the unit structure to commit the transaction. If updating the 67 * watermarks fails, the unit structure is in an intermediate state and 68 * the driver will not allow access to the device. 69 * 70 * A typical sequence of events is: 71 * 1. Fetch the list of names for all soft partitions on a volume 72 * meta_sp_get_by_component() 73 * 2. Construct an extent list from the name list 74 * meta_sp_extlist_from_namelist() 75 * 3. Fill the gaps in the extent list with free extents 76 * meta_sp_list_freefill() 77 * 4. Allocate from the free extents 78 * meta_sp_alloc_by_len() 79 * meta_sp_alloc_by_list() 80 * 5. Create the unit structure from the extent list 81 * meta_sp_createunit() 82 * meta_sp_updateunit() 83 * 6. Write out the watermarks 84 * meta_sp_update_wm() 85 * 7. Set the status to "Okay" 86 * meta_sp_setstatus() 87 * 88 */ 89 90 #include <stdio.h> 91 #include <meta.h> 92 #include "meta_repartition.h" 93 #include <sys/lvm/md_sp.h> 94 #include <sys/lvm/md_crc.h> 95 #include <strings.h> 96 #include <sys/lvm/md_mirror.h> 97 #include <sys/bitmap.h> 98 99 extern int md_in_daemon; 100 101 typedef struct sp_ext_node { 102 struct sp_ext_node *ext_next; /* next element */ 103 struct sp_ext_node *ext_prev; /* previous element */ 104 sp_ext_type_t ext_type; /* type of extent */ 105 sp_ext_offset_t ext_offset; /* starting offset */ 106 sp_ext_length_t ext_length; /* length of this node */ 107 uint_t ext_flags; /* extent flags */ 108 uint32_t ext_seq; /* watermark seq no */ 109 mdname_t *ext_namep; /* name pointer */ 110 mdsetname_t *ext_setp; /* set pointer */ 111 } sp_ext_node_t; 112 113 /* extent flags */ 114 #define EXTFLG_UPDATE (1) 115 116 /* Extent node compare function for list sorting */ 117 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *); 118 119 120 /* Function Prototypes */ 121 122 /* Debugging Functions */ 123 static void meta_sp_debug(char *format, ...); 124 static void meta_sp_printunit(mp_unit_t *mp); 125 126 /* Misc Support Functions */ 127 int meta_sp_parsesize(char *s, sp_ext_length_t *szp); 128 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp); 129 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp, 130 md_error_t *ep); 131 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp, 132 mdnamelist_t **nlpp, int force, md_error_t *ep); 133 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp, 134 mdname_t *compnp, md_error_t *ep); 135 136 /* Extent List Manipulation Functions */ 137 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2); 138 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2); 139 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np, 140 sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length, 141 sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare); 142 static void meta_sp_list_free(sp_ext_node_t **head); 143 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext); 144 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head, 145 sp_ext_type_t exttype, int exclude_wm); 146 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head, 147 sp_ext_offset_t offset); 148 static void meta_sp_list_freefill(sp_ext_node_t **extlist, 149 sp_ext_length_t size); 150 static void meta_sp_list_dump(sp_ext_node_t *head); 151 static int meta_sp_list_overlaps(sp_ext_node_t *head); 152 153 /* Extent List Query Functions */ 154 static boolean_t meta_sp_enough_space(int desired_number_of_sps, 155 blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp, 156 sp_ext_length_t alignment); 157 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep, 158 mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp, 159 md_error_t *ep); 160 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep, 161 mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp); 162 163 164 /* Extent Allocation Functions */ 165 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np, 166 sp_ext_node_t **extlist, sp_ext_node_t *free_ext, 167 sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq); 168 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np, 169 sp_ext_node_t **extlist, sp_ext_length_t *lp, 170 sp_ext_offset_t last_off, sp_ext_length_t alignment); 171 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np, 172 sp_ext_node_t **extlist, sp_ext_node_t *oblist); 173 174 /* Extent List Population Functions */ 175 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp, 176 sp_ext_node_t **extlist, md_error_t *ep); 177 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp, 178 sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep); 179 180 /* Print (metastat) Functions */ 181 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp, 182 mdprtopts_t options, md_error_t *ep); 183 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate); 184 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp, 185 char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep); 186 187 /* Watermark Manipulation Functions */ 188 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp, 189 sp_ext_node_t *extlist, md_error_t *ep); 190 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep); 191 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp, 192 mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep); 193 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp, 194 md_error_t *ep); 195 196 /* Unit Structure Manipulation Functions */ 197 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist); 198 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp, 199 sp_ext_node_t *extlist, int numexts, sp_ext_length_t len, 200 sp_status_t status, md_error_t *ep); 201 static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un, 202 sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts, 203 md_error_t *ep); 204 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist, 205 mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep); 206 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options, 207 int *repart_options, md_error_t *ep); 208 209 /* Reset (metaclear) Functions */ 210 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp, 211 md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep); 212 213 /* Recovery (metarecover) Functions */ 214 static void meta_sp_display_exthdr(void); 215 static void meta_sp_display_ext(sp_ext_node_t *ext); 216 static int meta_sp_checkseq(sp_ext_node_t *extlist); 217 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *, 218 mdname_t **, md_error_t *); 219 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np, 220 mdcmdopts_t options, md_error_t *ep); 221 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp, 222 mdcmdopts_t options, md_error_t *ep); 223 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np, 224 mdcmdopts_t options, md_error_t *ep); 225 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext, 226 sp_ext_node_t *unitext, md_error_t *ep); 227 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp, 228 mdcmdopts_t options, md_error_t *ep); 229 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np, 230 mdcmdopts_t options, md_error_t *ep); 231 232 /* 233 * Private Constants 234 */ 235 236 static const int FORCE_RELOAD_CACHE = 1; 237 static const uint_t NO_FLAGS = 0; 238 static const sp_ext_offset_t NO_OFFSET = 0ULL; 239 static const uint_t NO_SEQUENCE_NUMBER = 0; 240 static const int ONE_SOFT_PARTITION = 1; 241 242 static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)]; 243 244 #define TEST_SOFT_PARTITION_NAMEP NULL 245 #define TEST_SETNAMEP NULL 246 247 #define EXCLUDE_WM (1) 248 #define INCLUDE_WM (0) 249 250 #define SP_UNALIGNED (0LL) 251 252 /* 253 * ************************************************************************** 254 * Debugging Functions * 255 * ************************************************************************** 256 */ 257 258 /*PRINTFLIKE1*/ 259 static void 260 meta_sp_debug(char *format, ...) 261 { 262 static int debug; 263 static int debug_set = 0; 264 va_list ap; 265 266 if (!debug_set) { 267 debug = getenv(META_SP_DEBUG) ? 1 : 0; 268 debug_set = 1; 269 } 270 271 if (debug) { 272 va_start(ap, format); 273 (void) vfprintf(stderr, format, ap); 274 va_end(ap); 275 } 276 } 277 278 static void 279 meta_sp_printunit(mp_unit_t *mp) 280 { 281 int i; 282 283 if (mp == NULL) 284 return; 285 286 /* print the common fields we know about */ 287 (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type); 288 (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size); 289 (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp)); 290 291 /* sp-specific fields */ 292 (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status); 293 (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts); 294 (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length); 295 (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev); 296 (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev); 297 (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key); 298 299 /* print extent information */ 300 (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n"); 301 for (i = 0; i < mp->un_numexts; i++) { 302 (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i, 303 mp->un_ext[i].un_voff, mp->un_ext[i].un_poff, 304 mp->un_ext[i].un_len); 305 } 306 } 307 308 /* 309 * FUNCTION: meta_sp_parsesize() 310 * INPUT: s - the string to parse 311 * OUTPUT: *szp - disk block count (0 for "all") 312 * RETURNS: -1 for error, 0 for success 313 * PURPOSE: parses the command line parameter that specifies the 314 * requested size of a soft partition. The input string 315 * is either the literal "all" or a numeric value 316 * followed by a single character, b for disk blocks, k 317 * for kilobytes, m for megabytes, g for gigabytes, or t 318 * for terabytes. p for petabytes and e for exabytes 319 * have been added as undocumented features for future 320 * expansion. For example, 100m is 100 megabytes, while 321 * 50g is 50 gigabytes. All values are rounded up to the 322 * nearest block size. 323 */ 324 int 325 meta_sp_parsesize(char *s, sp_ext_length_t *szp) 326 { 327 if (s == NULL || szp == NULL) { 328 return (-1); 329 } 330 331 /* Check for literal "all" */ 332 if (strcasecmp(s, "all") == 0) { 333 *szp = 0; 334 return (0); 335 } 336 337 return (meta_sp_parsesizestring(s, szp)); 338 } 339 340 /* 341 * FUNCTION: meta_sp_parsesizestring() 342 * INPUT: s - the string to parse 343 * OUTPUT: *szp - disk block count 344 * RETURNS: -1 for error, 0 for success 345 * PURPOSE: parses a string that specifies size. The input string is a 346 * numeric value followed by a single character, b for disk blocks, 347 * k for kilobytes, m for megabytes, g for gigabytes, or t for 348 * terabytes. p for petabytes and e for exabytes have been added 349 * as undocumented features for future expansion. For example, 350 * 100m is 100 megabytes, while 50g is 50 gigabytes. All values 351 * are rounded up to the nearest block size. 352 */ 353 static int 354 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp) 355 { 356 sp_ext_length_t len = 0; 357 char len_type[2]; 358 359 if (s == NULL || szp == NULL) { 360 return (-1); 361 } 362 363 /* 364 * make sure block offset does not overflow 2^64 bytes. 365 */ 366 if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) || 367 (len == 0LL) || 368 (len > (1LL << (64 - DEV_BSHIFT)))) 369 return (-1); 370 371 switch (len_type[0]) { 372 case 'B': 373 case 'b': 374 len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE)); 375 break; 376 case 'K': 377 case 'k': 378 len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE)); 379 break; 380 case 'M': 381 case 'm': 382 len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE)); 383 break; 384 case 'g': 385 case 'G': 386 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE)); 387 break; 388 case 't': 389 case 'T': 390 len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL, 391 DEV_BSIZE)); 392 break; 393 case 'p': 394 case 'P': 395 len = lbtodb(roundup( 396 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 397 DEV_BSIZE)); 398 break; 399 case 'e': 400 case 'E': 401 len = lbtodb(roundup( 402 len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL, 403 DEV_BSIZE)); 404 break; 405 default: 406 /* error */ 407 return (-1); 408 } 409 410 *szp = len; 411 return (0); 412 } 413 414 /* 415 * FUNCTION: meta_sp_setgeom() 416 * INPUT: np - the underlying device to setup geometry for 417 * compnp - the underlying device to setup geometry for 418 * mp - the unit structure to set the geometry for 419 * OUTPUT: ep - return error pointer 420 * RETURNS: int - -1 if error, 0 otherwise 421 * PURPOSE: establishes geometry information for a device 422 */ 423 static int 424 meta_sp_setgeom( 425 mdname_t *np, 426 mdname_t *compnp, 427 mp_unit_t *mp, 428 md_error_t *ep 429 ) 430 { 431 mdgeom_t *geomp; 432 uint_t round_cyl = 0; 433 434 if ((geomp = metagetgeom(compnp, ep)) == NULL) 435 return (-1); 436 if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct, 437 geomp->read_reinstruct, round_cyl, ep) != 0) 438 return (-1); 439 440 return (0); 441 } 442 443 /* 444 * FUNCTION: meta_sp_setstatus() 445 * INPUT: sp - the set name for the devices to set the status on 446 * minors - an array of minor numbers of devices to set status on 447 * num_units - number of entries in the array 448 * status - status value to set all units to 449 * OUTPUT: ep - return error pointer 450 * RETURNS: int - -1 if error, 0 success 451 * PURPOSE: sets the status of one or more soft partitions to the 452 * requested value 453 */ 454 int 455 meta_sp_setstatus( 456 mdsetname_t *sp, 457 minor_t *minors, 458 int num_units, 459 sp_status_t status, 460 md_error_t *ep 461 ) 462 { 463 md_sp_statusset_t status_params; 464 465 assert(minors != NULL); 466 467 /* update status of all soft partitions to the status passed in */ 468 (void) memset(&status_params, 0, sizeof (status_params)); 469 status_params.num_units = num_units; 470 status_params.new_status = status; 471 status_params.size = num_units * sizeof (minor_t); 472 status_params.minors = (uintptr_t)minors; 473 MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno); 474 if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde, 475 NULL) != 0) { 476 (void) mdstealerror(ep, &status_params.mde); 477 return (-1); 478 } 479 return (0); 480 } 481 482 /* 483 * FUNCTION: meta_get_sp_names() 484 * INPUT: sp - the set name to get soft partitions from 485 * options - options from the command line 486 * OUTPUT: nlpp - list of all soft partition names 487 * ep - return error pointer 488 * RETURNS: int - -1 if error, 0 success 489 * PURPOSE: returns a list of all soft partitions in the metadb 490 * for all devices in the specified set 491 */ 492 int 493 meta_get_sp_names( 494 mdsetname_t *sp, 495 mdnamelist_t **nlpp, 496 int options, 497 md_error_t *ep 498 ) 499 { 500 return (meta_get_names(MD_SP, sp, nlpp, options, ep)); 501 } 502 503 /* 504 * FUNCTION: meta_get_by_component() 505 * INPUT: sp - the set name to get soft partitions from 506 * compnp - the name of the device containing the soft 507 * partitions that will be returned 508 * force - 0 - reads cached namelist if available, 509 * 1 - reloads cached namelist, frees old namelist 510 * OUTPUT: nlpp - list of all soft partition names 511 * ep - return error pointer 512 * RETURNS: int - -1 error, otherwise the number of soft partitions 513 * found on the component (0 = none found). 514 * PURPOSE: returns a list of all soft partitions on a given device 515 * from the metadb information 516 */ 517 static int 518 meta_sp_get_by_component( 519 mdsetname_t *sp, 520 mdname_t *compnp, 521 mdnamelist_t **nlpp, 522 int force, 523 md_error_t *ep 524 ) 525 { 526 static mdnamelist_t *cached_list = NULL; /* cached namelist */ 527 static int cached_count = 0; /* cached count */ 528 mdnamelist_t *spnlp = NULL; /* all sp names */ 529 mdnamelist_t *namep; /* list iterator */ 530 mdnamelist_t **tailpp = nlpp; /* namelist tail */ 531 mdnamelist_t **cachetailpp; /* cache tail */ 532 md_sp_t *msp; /* unit structure */ 533 int count = 0; /* count of sp's */ 534 int err; 535 mdname_t *curnp; 536 537 if ((cached_list != NULL) && (!force)) { 538 /* return a copy of the cached list */ 539 for (namep = cached_list; namep != NULL; namep = namep->next) 540 tailpp = meta_namelist_append_wrapper(tailpp, 541 namep->namep); 542 return (cached_count); 543 } 544 545 /* free the cache and reset values to zeros to prepare for a new list */ 546 metafreenamelist(cached_list); 547 cached_count = 0; 548 cached_list = NULL; 549 cachetailpp = &cached_list; 550 *nlpp = NULL; 551 552 /* get all the softpartitions first of all */ 553 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 554 return (-1); 555 556 /* 557 * Now for each sp, see if it resides on the component we 558 * are interested in, if so then add it to our list 559 */ 560 for (namep = spnlp; namep != NULL; namep = namep->next) { 561 curnp = namep->namep; 562 563 /* get the unit structure */ 564 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 565 continue; 566 567 /* 568 * If the current soft partition is not on the same 569 * component, continue the search. If it is on the same 570 * component, add it to our namelist. 571 */ 572 err = meta_check_samedrive(compnp, msp->compnamep, ep); 573 if (err <= 0) { 574 /* not on the same device, check the next one */ 575 continue; 576 } 577 578 /* it's on the same drive */ 579 580 /* 581 * Check for overlapping partitions if the component is not 582 * a metadevice. 583 */ 584 if (!metaismeta(msp->compnamep)) { 585 /* 586 * if they're on the same drive, neither 587 * should be a metadevice if one isn't 588 */ 589 assert(!metaismeta(compnp)); 590 591 if (meta_check_overlap(msp->compnamep->cname, 592 compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0) 593 continue; 594 595 /* in this case it's not an error for them to overlap */ 596 mdclrerror(ep); 597 } 598 599 /* Component is on the same device, add to the used list */ 600 tailpp = meta_namelist_append_wrapper(tailpp, curnp); 601 cachetailpp = meta_namelist_append_wrapper(cachetailpp, 602 curnp); 603 604 ++count; 605 ++cached_count; 606 } 607 608 assert(count == cached_count); 609 return (count); 610 611 out: 612 metafreenamelist(*nlpp); 613 *nlpp = NULL; 614 return (-1); 615 } 616 617 /* 618 * FUNCTION: meta_sp_get_default_alignment() 619 * INPUT: sp - the pertinent set name 620 * compnp - the name of the underlying component 621 * OUTPUT: ep - return error pointer 622 * RETURNS: sp_ext_length_t =0: no default alignment 623 * >0: default alignment 624 * PURPOSE: returns the default alignment for soft partitions to 625 * be built on top of the specified component or 626 * metadevice 627 */ 628 static sp_ext_length_t 629 meta_sp_get_default_alignment( 630 mdsetname_t *sp, 631 mdname_t *compnp, 632 md_error_t *ep 633 ) 634 { 635 sp_ext_length_t a = SP_UNALIGNED; 636 char *mname; 637 638 assert(compnp != NULL); 639 640 /* 641 * We treat raw devices as opaque, and assume nothing about 642 * their alignment requirements. 643 */ 644 if (!metaismeta(compnp)) 645 return (SP_UNALIGNED); 646 647 /* 648 * We already know it's a metadevice from the previous test; 649 * metagetmiscname() will tell us which metadevice type we 650 * have 651 */ 652 mname = metagetmiscname(compnp, ep); 653 if (mname == NULL) 654 goto out; 655 656 /* 657 * For a mirror, we want to deal with the stripe that is the 658 * primary side. If it happens to be asymmetrically 659 * configured, there is no simple way to fake a universal 660 * alignment. There's a chance that the least common 661 * denominator of the set of interlaces from all stripes of 662 * all submirrors would do it, but nobody that really cared 663 * that much about this issue would create an asymmetric 664 * config to start with. 665 * 666 * If the component underlying the soft partition is a mirror, 667 * then at the exit of this loop, compnp will have been 668 * updated to describe the first active submirror. 669 */ 670 if (strcmp(mname, MD_MIRROR) == 0) { 671 md_mirror_t *mp; 672 int smi; 673 md_submirror_t *smp; 674 675 mp = meta_get_mirror(sp, compnp, ep); 676 if (mp == NULL) 677 goto out; 678 679 for (smi = 0; smi < NMIRROR; smi++) { 680 681 smp = &mp->submirrors[smi]; 682 if (smp->state == SMS_UNUSED) 683 continue; 684 685 compnp = smp->submirnamep; 686 assert(compnp != NULL); 687 688 mname = metagetmiscname(compnp, ep); 689 if (mname == NULL) 690 goto out; 691 692 break; 693 } 694 695 if (smi == NMIRROR) 696 goto out; 697 } 698 699 /* 700 * Handle stripes and submirrors identically; just return the 701 * interlace of the first row. 702 */ 703 if (strcmp(mname, MD_STRIPE) == 0) { 704 md_stripe_t *stp; 705 706 stp = meta_get_stripe(sp, compnp, ep); 707 if (stp == NULL) 708 goto out; 709 710 a = stp->rows.rows_val[0].interlace; 711 goto out; 712 } 713 714 /* 715 * Raid is even more straightforward; the interlace applies to 716 * the entire device. 717 */ 718 if (strcmp(mname, MD_RAID) == 0) { 719 md_raid_t *rp; 720 721 rp = meta_get_raid(sp, compnp, ep); 722 if (rp == NULL) 723 goto out; 724 725 a = rp->interlace; 726 goto out; 727 } 728 729 /* 730 * If we have arrived here with the alignment still not set, 731 * then we expect the error to have been set by one of the 732 * routines we called. If neither is the case, something has 733 * really gone wrong above. (Probably the submirror walk 734 * failed to produce a valid submirror, but that would be 735 * really bad...) 736 */ 737 out: 738 meta_sp_debug("meta_sp_get_default_alignment: miscname %s, " 739 "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a); 740 741 if (getenv(META_SP_DEBUG) && !mdisok(ep)) { 742 mde_perror(ep, NULL); 743 } 744 745 assert((a > 0) || (!mdisok(ep))); 746 747 return (a); 748 } 749 750 751 752 /* 753 * FUNCTION: meta_check_insp() 754 * INPUT: sp - the set name for the device to check 755 * np - the name of the device to check 756 * slblk - the starting offset of the device to check 757 * nblks - the number of blocks in the device to check 758 * OUTPUT: ep - return error pointer 759 * RETURNS: int - 0 - device contains soft partitions 760 * -1 - device does not contain soft partitions 761 * PURPOSE: determines whether a device contains any soft partitions 762 */ 763 /* ARGSUSED */ 764 int 765 meta_check_insp( 766 mdsetname_t *sp, 767 mdname_t *np, 768 diskaddr_t slblk, 769 diskaddr_t nblks, 770 md_error_t *ep 771 ) 772 { 773 mdnamelist_t *spnlp = NULL; /* soft partition name list */ 774 int count; 775 int rval; 776 777 /* check set pointer */ 778 assert(sp != NULL); 779 780 /* find all soft partitions on the component */ 781 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 782 783 if (count == -1) { 784 rval = -1; 785 } else if (count > 0) { 786 rval = mduseerror(ep, MDE_ALREADY, np->dev, 787 spnlp->namep->cname, np->cname); 788 } else { 789 rval = 0; 790 } 791 792 metafreenamelist(spnlp); 793 return (rval); 794 } 795 796 /* 797 * ************************************************************************** 798 * Extent List Manipulation Functions * 799 * ************************************************************************** 800 */ 801 802 /* 803 * FUNCTION: meta_sp_cmp_by_nameseq() 804 * INPUT: e1 - first node to compare 805 * e2 - second node to compare 806 * OUTPUT: none 807 * RETURNS: int - =0 - nodes are equal 808 * <0 - e1 should go before e2 809 * >0 - e1 should go after e2 810 * PURPOSE: used for sorted list inserts to build a list sorted by 811 * name first and sequence number second. 812 */ 813 static int 814 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2) 815 { 816 int rval; 817 818 if (e1->ext_namep == NULL) 819 return (1); 820 if (e2->ext_namep == NULL) 821 return (-1); 822 if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0) 823 return (rval); 824 825 /* the names are equal, compare sequence numbers */ 826 if (e1->ext_seq > e2->ext_seq) 827 return (1); 828 if (e1->ext_seq < e2->ext_seq) 829 return (-1); 830 /* sequence numbers are also equal */ 831 return (0); 832 } 833 834 /* 835 * FUNCTION: meta_sp_cmp_by_offset() 836 * INPUT: e1 - first node to compare 837 * e2 - second node to compare 838 * OUTPUT: none 839 * RETURNS: int - =0 - nodes are equal 840 * <0 - e1 should go before e2 841 * >0 - e1 should go after e2 842 * PURPOSE: used for sorted list inserts to build a list sorted by offset 843 */ 844 static int 845 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2) 846 { 847 if (e1->ext_offset > e2->ext_offset) 848 return (1); 849 if (e1->ext_offset < e2->ext_offset) 850 return (-1); 851 /* offsets are equal */ 852 return (0); 853 } 854 855 /* 856 * FUNCTION: meta_sp_list_insert() 857 * INPUT: sp - the set name for the device the node belongs to 858 * np - the name of the device the node belongs to 859 * head - the head of the list, must be NULL for empty list 860 * offset - the physical offset of this extent in sectors 861 * length - the length of this extent in sectors 862 * type - the type of the extent being inserted 863 * seq - the sequence number of the extent being inserted 864 * flags - extent flags (eg. whether it needs to be updated) 865 * compare - the compare function to use 866 * OUTPUT: head - points to the new head if a node was inserted 867 * at the beginning 868 * RETURNS: void 869 * PURPOSE: inserts an extent node into a sorted doubly linked list. 870 * The sort order is determined by the compare function. 871 * Memory is allocated for the node in this function and it 872 * is up to the caller to free it, possibly using 873 * meta_sp_list_free(). If a node is inserted at the 874 * beginning of the list, the head pointer is updated to 875 * point to the new first node. 876 */ 877 static void 878 meta_sp_list_insert( 879 mdsetname_t *sp, 880 mdname_t *np, 881 sp_ext_node_t **head, 882 sp_ext_offset_t offset, 883 sp_ext_length_t length, 884 sp_ext_type_t type, 885 uint_t seq, 886 uint_t flags, 887 ext_cmpfunc_t compare 888 ) 889 { 890 sp_ext_node_t *newext; 891 sp_ext_node_t *curext; 892 893 assert(head != NULL); 894 895 /* Don't bother adding zero length nodes */ 896 if (length == 0ULL) 897 return; 898 899 /* allocate and fill in new ext_node */ 900 newext = Zalloc(sizeof (sp_ext_node_t)); 901 902 newext->ext_offset = offset; 903 newext->ext_length = length; 904 newext->ext_flags = flags; 905 newext->ext_type = type; 906 newext->ext_seq = seq; 907 newext->ext_setp = sp; 908 newext->ext_namep = np; 909 910 /* first node in the list */ 911 if (*head == NULL) { 912 newext->ext_next = newext->ext_prev = NULL; 913 *head = newext; 914 } else if ((*compare)(*head, newext) >= 0) { 915 /* the first node has a bigger offset, so insert before it */ 916 assert((*head)->ext_prev == NULL); 917 918 newext->ext_prev = NULL; 919 newext->ext_next = *head; 920 (*head)->ext_prev = newext; 921 *head = newext; 922 } else { 923 /* 924 * find the next node whose offset is greater than 925 * the one we want to insert, or the end of the list. 926 */ 927 for (curext = *head; 928 (curext->ext_next != NULL) && 929 ((*compare)(curext->ext_next, newext) < 0); 930 (curext = curext->ext_next)) 931 ; 932 933 /* link the new node in after the current node */ 934 newext->ext_next = curext->ext_next; 935 newext->ext_prev = curext; 936 937 if (curext->ext_next != NULL) 938 curext->ext_next->ext_prev = newext; 939 940 curext->ext_next = newext; 941 } 942 } 943 944 /* 945 * FUNCTION: meta_sp_list_free() 946 * INPUT: head - the head of the list, must be NULL for empty list 947 * OUTPUT: head - points to NULL on return 948 * RETURNS: void 949 * PURPOSE: walks a double linked extent list and frees each node 950 */ 951 static void 952 meta_sp_list_free(sp_ext_node_t **head) 953 { 954 sp_ext_node_t *ext; 955 sp_ext_node_t *next; 956 957 assert(head != NULL); 958 959 ext = *head; 960 while (ext) { 961 next = ext->ext_next; 962 Free(ext); 963 ext = next; 964 } 965 *head = NULL; 966 } 967 968 /* 969 * FUNCTION: meta_sp_list_remove() 970 * INPUT: head - the head of the list, must be NULL for empty list 971 * ext - the extent to remove, must be a member of the list 972 * OUTPUT: head - points to the new head of the list 973 * RETURNS: void 974 * PURPOSE: unlinks the node specified by ext from the list and 975 * frees it, possibly moving the head pointer forward if 976 * the head is the node being removed. 977 */ 978 static void 979 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext) 980 { 981 assert(head != NULL); 982 assert(*head != NULL); 983 984 if (*head == ext) 985 *head = ext->ext_next; 986 987 if (ext->ext_prev != NULL) 988 ext->ext_prev->ext_next = ext->ext_next; 989 if (ext->ext_next != NULL) 990 ext->ext_next->ext_prev = ext->ext_prev; 991 Free(ext); 992 } 993 994 /* 995 * FUNCTION: meta_sp_list_size() 996 * INPUT: head - the head of the list, must be NULL for empty list 997 * exttype - the type of the extents to sum 998 * exclude_wm - subtract space for extent headers from total 999 * OUTPUT: none 1000 * RETURNS: sp_ext_length_t - the sum of all of the lengths 1001 * PURPOSE: sums the lengths of all extents in the list matching the 1002 * specified type. This could be used for computing the 1003 * amount of free or used space, for example. 1004 */ 1005 static sp_ext_length_t 1006 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm) 1007 { 1008 sp_ext_node_t *ext; 1009 sp_ext_length_t size = 0LL; 1010 1011 for (ext = head; ext != NULL; ext = ext->ext_next) 1012 if (ext->ext_type == exttype) 1013 size += ext->ext_length - 1014 ((exclude_wm) ? MD_SP_WMSIZE : 0); 1015 1016 return (size); 1017 } 1018 1019 /* 1020 * FUNCTION: meta_sp_list_find() 1021 * INPUT: head - the head of the list, must be NULL for empty list 1022 * offset - the offset contained by the node to find 1023 * OUTPUT: none 1024 * RETURNS: sp_ext_node_t * - the node containing the requested offset 1025 * or NULL if no such nodes were found. 1026 * PURPOSE: finds a node in a list containing the requested offset 1027 * (inclusive). If multiple nodes contain this offset then 1028 * only the first will be returned, though typically these 1029 * lists are managed with non-overlapping nodes. 1030 * 1031 * *The list MUST be sorted by offset for this function to work.* 1032 */ 1033 static sp_ext_node_t * 1034 meta_sp_list_find( 1035 sp_ext_node_t *head, 1036 sp_ext_offset_t offset 1037 ) 1038 { 1039 sp_ext_node_t *ext; 1040 1041 for (ext = head; ext != NULL; ext = ext->ext_next) { 1042 /* check if the offset lies within this extent */ 1043 if ((offset >= ext->ext_offset) && 1044 (offset < ext->ext_offset + ext->ext_length)) { 1045 /* 1046 * the requested extent should always be a 1047 * subset of an extent in the list. 1048 */ 1049 return (ext); 1050 } 1051 } 1052 return (NULL); 1053 } 1054 1055 /* 1056 * FUNCTION: meta_sp_list_freefill() 1057 * INPUT: head - the head of the list, must be NULL for empty list 1058 * size - the size of the volume this extent list is 1059 * representing 1060 * OUTPUT: head - the new head of the list 1061 * RETURNS: void 1062 * PURPOSE: finds gaps in the extent list and fills them with a free 1063 * node. If there is a gap at the beginning the head 1064 * pointer will be changed to point to the new free node. 1065 * If there is free space at the end, the last free extent 1066 * will extend all the way out to the size specified. 1067 * 1068 * *The list MUST be sorted by offset for this function to work.* 1069 */ 1070 static void 1071 meta_sp_list_freefill( 1072 sp_ext_node_t **head, 1073 sp_ext_length_t size 1074 ) 1075 { 1076 sp_ext_node_t *ext; 1077 sp_ext_offset_t curoff = 0LL; 1078 1079 for (ext = *head; ext != NULL; ext = ext->ext_next) { 1080 if (curoff < ext->ext_offset) 1081 meta_sp_list_insert(NULL, NULL, head, 1082 curoff, ext->ext_offset - curoff, 1083 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1084 curoff = ext->ext_offset + ext->ext_length; 1085 } 1086 1087 /* pad inverse list out to the end */ 1088 if (curoff < size) 1089 meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff, 1090 EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset); 1091 1092 if (getenv(META_SP_DEBUG)) { 1093 meta_sp_debug("meta_sp_list_freefill: Extent list with " 1094 "holes freefilled:\n"); 1095 meta_sp_list_dump(*head); 1096 } 1097 } 1098 1099 /* 1100 * FUNCTION: meta_sp_list_dump() 1101 * INPUT: head - the head of the list, must be NULL for empty list 1102 * OUTPUT: none 1103 * RETURNS: void 1104 * PURPOSE: dumps the entire extent list to stdout for easy debugging 1105 */ 1106 static void 1107 meta_sp_list_dump(sp_ext_node_t *head) 1108 { 1109 sp_ext_node_t *ext; 1110 1111 meta_sp_debug("meta_sp_list_dump: dumping extent list:\n"); 1112 meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name", 1113 "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev", 1114 "Next"); 1115 for (ext = head; ext != NULL; ext = ext->ext_next) { 1116 if (ext->ext_namep != NULL) 1117 meta_sp_debug("%5s", ext->ext_namep->cname); 1118 else 1119 meta_sp_debug("%5s", "NONE"); 1120 1121 meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq); 1122 switch (ext->ext_type) { 1123 case EXTTYP_ALLOC: 1124 meta_sp_debug("%7s ", "ALLOC"); 1125 break; 1126 case EXTTYP_FREE: 1127 meta_sp_debug("%7s ", "FREE"); 1128 break; 1129 case EXTTYP_END: 1130 meta_sp_debug("%7s ", "END"); 1131 break; 1132 case EXTTYP_RESERVED: 1133 meta_sp_debug("%7s ", "RESV"); 1134 break; 1135 default: 1136 meta_sp_debug("%7s ", "INVLD"); 1137 break; 1138 } 1139 1140 meta_sp_debug("%10llu %10llu %5u %10p %10p\n", 1141 ext->ext_offset, ext->ext_length, 1142 ext->ext_flags, (void *) ext->ext_prev, 1143 (void *) ext->ext_next); 1144 } 1145 meta_sp_debug("\n"); 1146 } 1147 1148 /* 1149 * FUNCTION: meta_sp_list_overlaps() 1150 * INPUT: head - the head of the list, must be NULL for empty list 1151 * OUTPUT: none 1152 * RETURNS: int - 1 if extents overlap, 0 if ok 1153 * PURPOSE: checks a list for overlaps. The list MUST be sorted by 1154 * offset for this function to work properly. 1155 */ 1156 static int 1157 meta_sp_list_overlaps(sp_ext_node_t *head) 1158 { 1159 sp_ext_node_t *ext; 1160 1161 for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) { 1162 if (ext->ext_offset + ext->ext_length > 1163 ext->ext_next->ext_offset) 1164 return (1); 1165 } 1166 return (0); 1167 } 1168 1169 /* 1170 * ************************************************************************** 1171 * Extent Allocation Functions * 1172 * ************************************************************************** 1173 */ 1174 1175 /* 1176 * FUNCTION: meta_sp_alloc_by_ext() 1177 * INPUT: sp - the set name for the device the node belongs to 1178 * np - the name of the device the node belongs to 1179 * head - the head of the list, must be NULL for empty list 1180 * free_ext - the free extent being allocated from 1181 * alloc_offset - the offset of the allocation 1182 * alloc_len - the length of the allocation 1183 * seq - the sequence number of the allocation 1184 * OUTPUT: head - the new head pointer 1185 * RETURNS: void 1186 * PURPOSE: allocates a portion of the free extent free_ext. The 1187 * allocated portion starts at alloc_offset and is 1188 * alloc_length long. Both (alloc_offset) and (alloc_offset + 1189 * alloc_length) must be contained within the free extent. 1190 * 1191 * The free extent is split into as many as 3 pieces - a 1192 * free extent containing [ free_offset .. alloc_offset ), an 1193 * allocated extent containing the range [ alloc_offset .. 1194 * alloc_end ], and another free extent containing the 1195 * range ( alloc_end .. free_end ]. If either of the two 1196 * new free extents would be zero length, they are not created. 1197 * 1198 * Finally, the original free extent is removed. All newly 1199 * created extents have the EXTFLG_UPDATE flag set. 1200 */ 1201 static void 1202 meta_sp_alloc_by_ext( 1203 mdsetname_t *sp, 1204 mdname_t *np, 1205 sp_ext_node_t **head, 1206 sp_ext_node_t *free_ext, 1207 sp_ext_offset_t alloc_offset, 1208 sp_ext_length_t alloc_length, 1209 uint_t seq 1210 ) 1211 { 1212 sp_ext_offset_t free_offset = free_ext->ext_offset; 1213 sp_ext_length_t free_length = free_ext->ext_length; 1214 1215 sp_ext_offset_t alloc_end = alloc_offset + alloc_length; 1216 sp_ext_offset_t free_end = free_offset + free_length; 1217 1218 /* allocated extent must be a subset of the free extent */ 1219 assert(free_offset <= alloc_offset); 1220 assert(free_end >= alloc_end); 1221 1222 meta_sp_list_remove(head, free_ext); 1223 1224 if (free_offset < alloc_offset) { 1225 meta_sp_list_insert(NULL, NULL, head, free_offset, 1226 (alloc_offset - free_offset), EXTTYP_FREE, 0, 1227 EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1228 } 1229 1230 if (free_end > alloc_end) { 1231 meta_sp_list_insert(NULL, NULL, head, alloc_end, 1232 (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE, 1233 meta_sp_cmp_by_offset); 1234 } 1235 1236 meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length, 1237 EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 1238 1239 if (getenv(META_SP_DEBUG)) { 1240 meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n"); 1241 meta_sp_list_dump(*head); 1242 } 1243 } 1244 1245 /* 1246 * FUNCTION: meta_sp_alloc_by_len() 1247 * INPUT: sp - the set name for the device the node belongs to 1248 * np - the name of the device the node belongs to 1249 * head - the head of the list, must be NULL for empty list 1250 * *lp - the requested length to allocate 1251 * last_off - the last offset already allocated. 1252 * alignment - the desired extent alignmeent 1253 * OUTPUT: head - the new head pointer 1254 * *lp - the length allocated 1255 * RETURNS: int - -1 if error, the number of new extents on success 1256 * PURPOSE: allocates extents from free space to satisfy the requested 1257 * length. If requested length is zero, allocates all 1258 * remaining free space. This function provides the meat 1259 * of the extent allocation algorithm. Allocation is a 1260 * three tier process: 1261 * 1262 * 1. If last_off is nonzero and there is free space following 1263 * that node, then it is extended to allocate as much of that 1264 * free space as possible. This is useful for metattach. 1265 * 2. If a free extent can be found to satisfy the remaining 1266 * requested space, then satisfy the rest of the request 1267 * from that extent. 1268 * 3. Start allocating space from any remaining free extents until 1269 * the remainder of the request is satisified. 1270 * 1271 * If alignment is non-zero, then every extent modified 1272 * or newly allocated will be aligned modulo alignment, 1273 * with a length that is an integer multiple of 1274 * alignment. 1275 * 1276 * The EXTFLG_UPDATE flag is set for all nodes (free and 1277 * allocated) that require updated watermarks. 1278 * 1279 * This algorithm may have a negative impact on fragmentation 1280 * in pathological cases and may be improved if it turns out 1281 * to be a problem. This may be exacerbated by particularly 1282 * large alignments. 1283 * 1284 * NOTE: It's confusing, so it demands an explanation: 1285 * - len is used to represent requested data space; it 1286 * does not include room for a watermark. On each full 1287 * or partial allocation, len will be decremented by 1288 * alloc_len (see next paragraph) until it reaches 1289 * zero. 1290 * - alloc_len is used to represent data space allocated 1291 * from a particular extent; it does not include space 1292 * for a watermark. In the rare event that a_length 1293 * (see next paragraph) is equal to MD_SP_WMSIZE, 1294 * alloc_len will be zero and the resulting MD_SP_WMSIZE 1295 * fragment of space will be utterly unusable. 1296 * - a_length is used to represent all space to be 1297 * allocated from a particular extent; it DOES include 1298 * space for a watermark. 1299 */ 1300 static int 1301 meta_sp_alloc_by_len( 1302 mdsetname_t *sp, 1303 mdname_t *np, 1304 sp_ext_node_t **head, 1305 sp_ext_length_t *lp, 1306 sp_ext_offset_t last_off, 1307 sp_ext_offset_t alignment 1308 ) 1309 { 1310 sp_ext_node_t *free_ext; 1311 sp_ext_node_t *alloc_ext; 1312 uint_t last_seq = 0; 1313 uint_t numexts = 0; 1314 sp_ext_length_t freespace; 1315 sp_ext_length_t alloc_len; 1316 sp_ext_length_t len; 1317 1318 /* We're DOA if we can't read *lp */ 1319 assert(lp != NULL); 1320 len = *lp; 1321 1322 /* 1323 * Process the nominal case first: we've been given an actual 1324 * size argument, rather than the literal "all" 1325 */ 1326 1327 if (len != 0) { 1328 1329 /* 1330 * Short circuit the check for free space. This may 1331 * tell us we have enough space when we really don't 1332 * because each extent loses space to a watermark, but 1333 * it will always tell us there isn't enough space 1334 * correctly. Worst case we do some extra work. 1335 */ 1336 freespace = meta_sp_list_size(*head, EXTTYP_FREE, 1337 INCLUDE_WM); 1338 1339 if (freespace < len) 1340 return (-1); 1341 1342 /* 1343 * First see if we can extend the last extent for an 1344 * attach. 1345 */ 1346 if (last_off != 0LL) { 1347 int align = 0; 1348 1349 alloc_ext = 1350 meta_sp_list_find(*head, last_off); 1351 assert(alloc_ext != NULL); 1352 1353 /* 1354 * The offset test reflects the 1355 * inclusion of the watermark in the extent 1356 */ 1357 align = (alignment > 0) && 1358 (((alloc_ext->ext_offset + MD_SP_WMSIZE) % 1359 alignment) == 0); 1360 1361 /* 1362 * If we decided not to align here, we should 1363 * also reset "alignment" so we don't bother 1364 * later, either. 1365 */ 1366 if (!align) { 1367 alignment = 0; 1368 } 1369 1370 last_seq = alloc_ext->ext_seq; 1371 1372 free_ext = meta_sp_list_find(*head, 1373 alloc_ext->ext_offset + 1374 alloc_ext->ext_length); 1375 1376 /* 1377 * If a free extent follows our last allocated 1378 * extent, then remove the last allocated 1379 * extent and increase the size of the free 1380 * extent to overlap it, then allocate the 1381 * total space from the new free extent. 1382 */ 1383 if (free_ext != NULL && 1384 free_ext->ext_type == EXTTYP_FREE) { 1385 assert(free_ext->ext_offset == 1386 alloc_ext->ext_offset + 1387 alloc_ext->ext_length); 1388 1389 alloc_len = 1390 MIN(len, free_ext->ext_length); 1391 1392 if (align && (alloc_len < len)) { 1393 /* No watermark space needed */ 1394 alloc_len -= alloc_len % alignment; 1395 } 1396 1397 if (alloc_len > 0) { 1398 free_ext->ext_offset -= 1399 alloc_ext->ext_length; 1400 free_ext->ext_length += 1401 alloc_ext->ext_length; 1402 1403 meta_sp_alloc_by_ext(sp, np, head, 1404 free_ext, free_ext->ext_offset, 1405 alloc_ext->ext_length + alloc_len, 1406 last_seq); 1407 1408 /* 1409 * now remove the original allocated 1410 * node. We may have overlapping 1411 * extents for a short time before 1412 * this node is removed. 1413 */ 1414 meta_sp_list_remove(head, alloc_ext); 1415 len -= alloc_len; 1416 } 1417 } 1418 last_seq++; 1419 } 1420 1421 if (len == 0LL) 1422 goto out; 1423 1424 /* 1425 * Next, see if we can find a single allocation for 1426 * the remainder. This may make fragmentation worse 1427 * in some cases, but there's no good way to allocate 1428 * that doesn't have a highly fragmented corner case. 1429 */ 1430 for (free_ext = *head; free_ext != NULL; 1431 free_ext = free_ext->ext_next) { 1432 sp_ext_offset_t a_offset; 1433 sp_ext_offset_t a_length; 1434 1435 if (free_ext->ext_type != EXTTYP_FREE) 1436 continue; 1437 1438 /* 1439 * The length test should include space for 1440 * the watermark 1441 */ 1442 1443 a_offset = free_ext->ext_offset; 1444 a_length = free_ext->ext_length; 1445 1446 if (alignment > 0) { 1447 1448 /* 1449 * Shortcut for extents that have been 1450 * previously added to pad out the 1451 * data space 1452 */ 1453 if (a_length < alignment) { 1454 continue; 1455 } 1456 1457 /* 1458 * Round up so the data space begins 1459 * on a properly aligned boundary. 1460 */ 1461 a_offset += alignment - 1462 (a_offset % alignment) - MD_SP_WMSIZE; 1463 1464 /* 1465 * This is only necessary in case the 1466 * watermark size is ever greater than 1467 * one. It'll never happen, of 1468 * course; we'll get rid of watermarks 1469 * before we make 'em bigger. 1470 */ 1471 if (a_offset < free_ext->ext_offset) { 1472 a_offset += alignment; 1473 } 1474 1475 /* 1476 * Adjust the length to account for 1477 * the space lost above (if any) 1478 */ 1479 a_length -= 1480 (a_offset - free_ext->ext_offset); 1481 } 1482 1483 if (a_length >= len + MD_SP_WMSIZE) { 1484 meta_sp_alloc_by_ext(sp, np, head, 1485 free_ext, a_offset, 1486 len + MD_SP_WMSIZE, last_seq); 1487 1488 len = 0LL; 1489 numexts++; 1490 break; 1491 } 1492 } 1493 1494 if (len == 0LL) 1495 goto out; 1496 1497 1498 /* 1499 * If the request could not be satisfied by extending 1500 * the last extent or by a single extent, then put 1501 * multiple smaller extents together until the request 1502 * is satisfied. 1503 */ 1504 for (free_ext = *head; (free_ext != NULL) && (len > 0); 1505 free_ext = free_ext->ext_next) { 1506 sp_ext_offset_t a_offset; 1507 sp_ext_length_t a_length; 1508 1509 if (free_ext->ext_type != EXTTYP_FREE) 1510 continue; 1511 1512 a_offset = free_ext->ext_offset; 1513 a_length = free_ext->ext_length; 1514 1515 if (alignment > 0) { 1516 1517 /* 1518 * Shortcut for extents that have been 1519 * previously added to pad out the 1520 * data space 1521 */ 1522 if (a_length < alignment) { 1523 continue; 1524 } 1525 1526 /* 1527 * Round up so the data space begins 1528 * on a properly aligned boundary. 1529 */ 1530 a_offset += alignment - 1531 (a_offset % alignment) - MD_SP_WMSIZE; 1532 1533 /* 1534 * This is only necessary in case the 1535 * watermark size is ever greater than 1536 * one. It'll never happen, of 1537 * course; we'll get rid of watermarks 1538 * before we make 'em bigger. 1539 */ 1540 if (a_offset < free_ext->ext_offset) { 1541 a_offset += alignment; 1542 } 1543 1544 /* 1545 * Adjust the length to account for 1546 * the space lost above (if any) 1547 */ 1548 a_length -= 1549 (a_offset - free_ext->ext_offset); 1550 1551 /* 1552 * Adjust the length to be properly 1553 * aligned if it is NOT to be the 1554 * last extent in the soft partition. 1555 */ 1556 if ((a_length - MD_SP_WMSIZE) < len) 1557 a_length -= 1558 (a_length - MD_SP_WMSIZE) 1559 % alignment; 1560 } 1561 1562 alloc_len = MIN(len, a_length - MD_SP_WMSIZE); 1563 if (alloc_len == 0) 1564 continue; 1565 1566 /* 1567 * meta_sp_alloc_by_ext() expects the 1568 * allocation length to include the watermark 1569 * size, which is why we don't simply pass in 1570 * alloc_len here. 1571 */ 1572 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1573 a_offset, MIN(len + MD_SP_WMSIZE, a_length), 1574 last_seq); 1575 1576 len -= alloc_len; 1577 numexts++; 1578 last_seq++; 1579 } 1580 1581 1582 /* 1583 * If there was not enough space we can throw it all 1584 * away since no real work has been done yet. 1585 */ 1586 if (len != 0) { 1587 meta_sp_list_free(head); 1588 return (-1); 1589 } 1590 } 1591 1592 /* 1593 * Otherwise, the literal "all" was specified: allocate all 1594 * available free space. Don't bother with alignment. 1595 */ 1596 else { 1597 /* First, extend the last extent if this is a grow */ 1598 if (last_off != 0LL) { 1599 alloc_ext = 1600 meta_sp_list_find(*head, last_off); 1601 assert(alloc_ext != NULL); 1602 1603 last_seq = alloc_ext->ext_seq; 1604 1605 free_ext = meta_sp_list_find(*head, 1606 alloc_ext->ext_offset + 1607 alloc_ext->ext_length); 1608 1609 /* 1610 * If a free extent follows our last allocated 1611 * extent, then remove the last allocated 1612 * extent and increase the size of the free 1613 * extent to overlap it, then allocate the 1614 * total space from the new free extent. 1615 */ 1616 if (free_ext != NULL && 1617 free_ext->ext_type == EXTTYP_FREE) { 1618 assert(free_ext->ext_offset == 1619 alloc_ext->ext_offset + 1620 alloc_ext->ext_length); 1621 1622 len = alloc_len = 1623 free_ext->ext_length; 1624 1625 free_ext->ext_offset -= 1626 alloc_ext->ext_length; 1627 free_ext->ext_length += 1628 alloc_ext->ext_length; 1629 1630 meta_sp_alloc_by_ext(sp, np, head, 1631 free_ext, free_ext->ext_offset, 1632 alloc_ext->ext_length + alloc_len, 1633 last_seq); 1634 1635 /* 1636 * now remove the original allocated 1637 * node. We may have overlapping 1638 * extents for a short time before 1639 * this node is removed. 1640 */ 1641 meta_sp_list_remove(head, alloc_ext); 1642 } 1643 1644 last_seq++; 1645 } 1646 1647 /* Next, grab all remaining free space */ 1648 for (free_ext = *head; free_ext != NULL; 1649 free_ext = free_ext->ext_next) { 1650 1651 if (free_ext->ext_type == EXTTYP_FREE) { 1652 alloc_len = 1653 free_ext->ext_length - MD_SP_WMSIZE; 1654 if (alloc_len == 0) 1655 continue; 1656 1657 /* 1658 * meta_sp_alloc_by_ext() expects the 1659 * allocation length to include the 1660 * watermark size, which is why we 1661 * don't simply pass in alloc_len 1662 * here. 1663 */ 1664 meta_sp_alloc_by_ext(sp, np, head, 1665 free_ext, free_ext->ext_offset, 1666 free_ext->ext_length, 1667 last_seq); 1668 1669 len += alloc_len; 1670 numexts++; 1671 last_seq++; 1672 } 1673 } 1674 } 1675 1676 out: 1677 if (getenv(META_SP_DEBUG)) { 1678 meta_sp_debug("meta_sp_alloc_by_len: Extent list after " 1679 "allocation:\n"); 1680 meta_sp_list_dump(*head); 1681 } 1682 1683 if (*lp == 0) { 1684 *lp = len; 1685 1686 /* 1687 * Make sure the callers hit a no space error if we 1688 * didn't actually find anything. 1689 */ 1690 if (len == 0) { 1691 return (-1); 1692 } 1693 } 1694 1695 return (numexts); 1696 } 1697 1698 /* 1699 * FUNCTION: meta_sp_alloc_by_list() 1700 * INPUT: sp - the set name for the device the node belongs to 1701 * np - the name of the device the node belongs to 1702 * head - the head of the list, must be NULL for empty list 1703 * oblist - an extent list containing requested nodes to allocate 1704 * OUTPUT: head - the new head pointer 1705 * RETURNS: int - -1 if error, the number of new extents on success 1706 * PURPOSE: allocates extents from free space to satisfy the requested 1707 * extent list. This is primarily used for the -o/-b options 1708 * where the user may specifically request extents to allocate. 1709 * Each extent in the oblist must be a subset (inclusive) of a 1710 * free extent and may not overlap each other. This 1711 * function sets the EXTFLG_UPDATE flag for each node that 1712 * requires a watermark update after allocating. 1713 */ 1714 static int 1715 meta_sp_alloc_by_list( 1716 mdsetname_t *sp, 1717 mdname_t *np, 1718 sp_ext_node_t **head, 1719 sp_ext_node_t *oblist 1720 ) 1721 { 1722 sp_ext_node_t *ext; 1723 sp_ext_node_t *free_ext; 1724 uint_t numexts = 0; 1725 1726 for (ext = oblist; ext != NULL; ext = ext->ext_next) { 1727 1728 free_ext = meta_sp_list_find(*head, 1729 ext->ext_offset - MD_SP_WMSIZE); 1730 1731 /* Make sure the allocation is within the free extent */ 1732 if ((free_ext == NULL) || 1733 (ext->ext_offset + ext->ext_length > 1734 free_ext->ext_offset + free_ext->ext_length) || 1735 (free_ext->ext_type != EXTTYP_FREE)) 1736 return (-1); 1737 1738 meta_sp_alloc_by_ext(sp, np, head, free_ext, 1739 ext->ext_offset - MD_SP_WMSIZE, 1740 ext->ext_length + MD_SP_WMSIZE, ext->ext_seq); 1741 1742 numexts++; 1743 } 1744 1745 assert(meta_sp_list_overlaps(*head) == 0); 1746 1747 if (getenv(META_SP_DEBUG)) { 1748 meta_sp_debug("meta_sp_alloc_by_list: Extent list after " 1749 "allocation:\n"); 1750 meta_sp_list_dump(*head); 1751 } 1752 1753 return (numexts); 1754 } 1755 1756 /* 1757 * ************************************************************************** 1758 * Extent List Population Functions * 1759 * ************************************************************************** 1760 */ 1761 1762 /* 1763 * FUNCTION: meta_sp_extlist_from_namelist() 1764 * INPUT: sp - the set name for the device the node belongs to 1765 * spnplp - the namelist of soft partitions to build a list from 1766 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1767 * ep - return error pointer 1768 * RETURNS: int - -1 if error, 0 on success 1769 * PURPOSE: builds an extent list representing the soft partitions 1770 * specified in the namelist. Each extent in each soft 1771 * partition is added to the list with the type EXTTYP_ALLOC. 1772 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1773 * extent in the list includes the space occupied by the 1774 * watermark, which is not included in the unit structures. 1775 */ 1776 static int 1777 meta_sp_extlist_from_namelist( 1778 mdsetname_t *sp, 1779 mdnamelist_t *spnlp, 1780 sp_ext_node_t **extlist, 1781 md_error_t *ep 1782 ) 1783 { 1784 int extn; 1785 md_sp_t *msp; /* unit structure of the sp's */ 1786 mdnamelist_t *namep; 1787 1788 assert(sp != NULL); 1789 1790 /* 1791 * Now go through the soft partitions and add a node to the used 1792 * list for each allocated extent. 1793 */ 1794 for (namep = spnlp; namep != NULL; namep = namep->next) { 1795 mdname_t *curnp = namep->namep; 1796 1797 /* get the unit structure */ 1798 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 1799 return (-1); 1800 1801 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1802 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1803 1804 /* 1805 * subtract from offset and add to the length 1806 * to account for the watermark, which is not 1807 * contained in the extents in the unit structure. 1808 */ 1809 meta_sp_list_insert(sp, curnp, extlist, 1810 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 1811 EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset); 1812 } 1813 } 1814 return (0); 1815 } 1816 1817 /* 1818 * FUNCTION: meta_sp_extlist_from_wm() 1819 * INPUT: sp - the set name for the device the node belongs to 1820 * compnp - the name of the device to scan watermarks on 1821 * OUTPUT: extlist - the extent list built from the SPs in the namelist 1822 * ep - return error pointer 1823 * RETURNS: int - -1 if error, 0 on success 1824 * PURPOSE: builds an extent list representing the soft partitions 1825 * specified in the namelist. Each extent in each soft 1826 * partition is added to the list with the type EXTTYP_ALLOC. 1827 * The EXTFLG_UPDATE flag is not set on any nodes. Each 1828 * extent in the list includes the space occupied by the 1829 * watermark, which is not included in the unit structures. 1830 */ 1831 static int 1832 meta_sp_extlist_from_wm( 1833 mdsetname_t *sp, 1834 mdname_t *compnp, 1835 sp_ext_node_t **extlist, 1836 ext_cmpfunc_t compare, 1837 md_error_t *ep 1838 ) 1839 { 1840 mp_watermark_t wm; 1841 mdname_t *np = NULL; 1842 mdsetname_t *spsetp = NULL; 1843 sp_ext_offset_t cur_off; 1844 1845 if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR) 1846 return (-1); 1847 1848 for (;;) { 1849 if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) { 1850 return (-1); 1851 } 1852 1853 /* get the set and name pointers */ 1854 if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) { 1855 if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) { 1856 return (-1); 1857 } 1858 } 1859 1860 if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) { 1861 if (meta_init_make_device(&sp, wm.wm_mdname, ep) != 0) 1862 return (-1); 1863 np = metaname(&spsetp, wm.wm_mdname, ep); 1864 if (np == NULL) { 1865 return (-1); 1866 } 1867 } 1868 1869 /* insert watermark into extent list */ 1870 meta_sp_list_insert(spsetp, np, extlist, cur_off, 1871 wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq, 1872 EXTFLG_UPDATE, compare); 1873 1874 /* if we see the end watermark, we're done */ 1875 if (wm.wm_type == EXTTYP_END) 1876 break; 1877 1878 cur_off += wm.wm_length + 1; 1879 1880 /* clear out set and name pointers for next iteration */ 1881 np = NULL; 1882 spsetp = NULL; 1883 } 1884 1885 return (0); 1886 } 1887 1888 /* 1889 * ************************************************************************** 1890 * Print (metastat) Functions * 1891 * ************************************************************************** 1892 */ 1893 1894 /* 1895 * FUNCTION: meta_sp_short_print() 1896 * INPUT: msp - the unit structure to display 1897 * fp - the file pointer to send output to 1898 * options - print options from the command line processor 1899 * OUTPUT: ep - return error pointer 1900 * RETURNS: int - -1 if error, 0 on success 1901 * PURPOSE: display a short report of the soft partition in md.tab 1902 * form, primarily used for metastat -p. 1903 */ 1904 static int 1905 meta_sp_short_print( 1906 md_sp_t *msp, 1907 char *fname, 1908 FILE *fp, 1909 mdprtopts_t options, 1910 md_error_t *ep 1911 ) 1912 { 1913 int extn; 1914 1915 if (options & PRINT_LARGEDEVICES) { 1916 if (msp->common.revision != MD_64BIT_META_DEV) 1917 return (0); 1918 } 1919 1920 /* print name and -p */ 1921 if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF) 1922 return (mdsyserror(ep, errno, fname)); 1923 1924 /* print the component */ 1925 /* 1926 * If the path is our standard /dev/rdsk or /dev/md/rdsk 1927 * then just print out the cxtxdxsx or the dx, metainit 1928 * will assume the default, otherwise we need the full 1929 * pathname to make sure this works as we intend. 1930 */ 1931 if ((strstr(msp->compnamep->rname, "/dev/rdsk") == NULL) && 1932 (strstr(msp->compnamep->rname, "/dev/md/rdsk") == NULL) && 1933 (strstr(msp->compnamep->rname, "/dev/td/") == NULL)) { 1934 /* not standard path so print full pathname */ 1935 if (fprintf(fp, " %s", msp->compnamep->rname) == EOF) 1936 return (mdsyserror(ep, errno, fname)); 1937 } else { 1938 /* standard path so print ctds or d number */ 1939 if (fprintf(fp, " %s", msp->compnamep->cname) == EOF) 1940 return (mdsyserror(ep, errno, fname)); 1941 } 1942 1943 /* print out each extent */ 1944 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 1945 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 1946 if (fprintf(fp, " -o %llu -b %llu ", extp->poff, 1947 extp->len) == EOF) 1948 return (mdsyserror(ep, errno, fname)); 1949 } 1950 1951 if (fprintf(fp, "\n") == EOF) 1952 return (mdsyserror(ep, errno, fname)); 1953 1954 /* success */ 1955 return (0); 1956 } 1957 1958 /* 1959 * FUNCTION: meta_sp_status_to_name() 1960 * INPUT: xsp_status - the status value to convert to a string 1961 * tstate - transient errored device state. If set the 1962 * device is Unavailable 1963 * OUTPUT: none 1964 * RETURNS: char * - a pointer to the string representing the status value 1965 * PURPOSE: return an internationalized string representing the 1966 * status value for a soft partition. The strings are 1967 * strdup'd and must be freed by the caller. 1968 */ 1969 static char * 1970 meta_sp_status_to_name( 1971 xsp_status_t xsp_status, 1972 uint_t tstate 1973 ) 1974 { 1975 char *rval = NULL; 1976 1977 /* 1978 * Check to see if we have MD_INACCESSIBLE set. This is the only valid 1979 * value for an 'Unavailable' return. tstate can be set because of 1980 * other multi-node reasons (e.g. ABR being set) 1981 */ 1982 if (tstate & MD_INACCESSIBLE) { 1983 return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable"))); 1984 } 1985 1986 switch (xsp_status) { 1987 case MD_SP_CREATEPEND: 1988 rval = Strdup(dgettext(TEXT_DOMAIN, "Creating")); 1989 break; 1990 case MD_SP_GROWPEND: 1991 rval = Strdup(dgettext(TEXT_DOMAIN, "Growing")); 1992 break; 1993 case MD_SP_DELPEND: 1994 rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting")); 1995 break; 1996 case MD_SP_OK: 1997 rval = Strdup(dgettext(TEXT_DOMAIN, "Okay")); 1998 break; 1999 case MD_SP_ERR: 2000 rval = Strdup(dgettext(TEXT_DOMAIN, "Errored")); 2001 break; 2002 case MD_SP_RECOVER: 2003 rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering")); 2004 break; 2005 } 2006 2007 if (rval == NULL) 2008 rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid")); 2009 2010 return (rval); 2011 } 2012 2013 /* 2014 * FUNCTION: meta_sp_report() 2015 * INPUT: sp - the set name for the unit being displayed 2016 * msp - the unit structure to display 2017 * nlpp - pass back the large devs 2018 * fp - the file pointer to send output to 2019 * options - print options from the command line processor 2020 * OUTPUT: ep - return error pointer 2021 * RETURNS: int - -1 if error, 0 on success 2022 * PURPOSE: print a full report of the device specified 2023 */ 2024 static int 2025 meta_sp_report( 2026 mdsetname_t *sp, 2027 md_sp_t *msp, 2028 mdnamelist_t **nlpp, 2029 char *fname, 2030 FILE *fp, 2031 mdprtopts_t options, 2032 md_error_t *ep 2033 ) 2034 { 2035 uint_t extn; 2036 char *status; 2037 char *devid = ""; 2038 mdname_t *didnp = NULL; 2039 ddi_devid_t dtp; 2040 int len; 2041 uint_t tstate = 0; 2042 2043 if (options & PRINT_LARGEDEVICES) { 2044 if (msp->common.revision != MD_64BIT_META_DEV) { 2045 return (0); 2046 } else { 2047 if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0) 2048 return (-1); 2049 } 2050 } 2051 2052 if (options & PRINT_HEADER) { 2053 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"), 2054 msp->common.namep->cname) == EOF) 2055 return (mdsyserror(ep, errno, fname)); 2056 } 2057 2058 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"), 2059 msp->compnamep->cname) == EOF) 2060 return (mdsyserror(ep, errno, fname)); 2061 2062 /* Determine if device is available before displaying status */ 2063 if (metaismeta(msp->common.namep)) { 2064 if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0) 2065 return (-1); 2066 } 2067 status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED); 2068 2069 /* print out "State" to be consistent with other metadevices */ 2070 if (tstate & MD_ABR_CAP) { 2071 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2072 " State: %s - Application Based Recovery (ABR)\n"), 2073 status) == EOF) { 2074 Free(status); 2075 return (mdsyserror(ep, errno, fname)); 2076 } 2077 } else { 2078 if (fprintf(fp, dgettext(TEXT_DOMAIN, 2079 " State: %s\n"), status) == EOF) { 2080 Free(status); 2081 return (mdsyserror(ep, errno, fname)); 2082 } 2083 } 2084 free(status); 2085 2086 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"), 2087 msp->common.size, 2088 meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF) 2089 return (mdsyserror(ep, errno, fname)); 2090 2091 /* print component details */ 2092 if (! metaismeta(msp->compnamep)) { 2093 diskaddr_t start_blk; 2094 int has_mddb; 2095 char *has_mddb_str; 2096 2097 /* print header */ 2098 /* 2099 * Building a format string on the fly that will 2100 * be used in (f)printf. This allows the length 2101 * of the ctd to vary from small to large without 2102 * looking horrible. 2103 */ 2104 len = strlen(msp->compnamep->cname); 2105 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 2106 len += 2; 2107 if (fprintf(fp, 2108 "\t%-*.*s %-12.12s %-5.5s %s\n", 2109 len, len, 2110 dgettext(TEXT_DOMAIN, "Device"), 2111 dgettext(TEXT_DOMAIN, "Start Block"), 2112 dgettext(TEXT_DOMAIN, "Dbase"), 2113 dgettext(TEXT_DOMAIN, "Reloc")) == EOF) { 2114 return (mdsyserror(ep, errno, fname)); 2115 } 2116 2117 2118 /* get info */ 2119 if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) == 2120 MD_DISKADDR_ERROR) 2121 return (-1); 2122 2123 if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0) 2124 return (-1); 2125 2126 if (has_mddb) 2127 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 2128 else 2129 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 2130 2131 /* populate the key in the name_p structure */ 2132 didnp = metadevname(&sp, msp->compnamep->dev, ep); 2133 if (didnp == NULL) { 2134 return (-1); 2135 } 2136 2137 /* determine if devid does NOT exist */ 2138 if (options & PRINT_DEVID) { 2139 if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep), 2140 didnp->key, ep)) == NULL) 2141 devid = dgettext(TEXT_DOMAIN, "No "); 2142 else { 2143 devid = dgettext(TEXT_DOMAIN, "Yes"); 2144 free(dtp); 2145 } 2146 } 2147 2148 /* print info */ 2149 /* 2150 * This allows the length 2151 * of the ctd to vary from small to large without 2152 * looking horrible. 2153 */ 2154 if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n", 2155 len, msp->compnamep->cname, 2156 start_blk, has_mddb_str, devid) == EOF) { 2157 return (mdsyserror(ep, errno, fname)); 2158 } 2159 (void) fprintf(fp, "\n"); 2160 } 2161 2162 2163 /* print the headers */ 2164 if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n", 2165 dgettext(TEXT_DOMAIN, "Extent"), 2166 dgettext(TEXT_DOMAIN, "Start Block"), 2167 dgettext(TEXT_DOMAIN, "Block count")) == EOF) 2168 return (mdsyserror(ep, errno, fname)); 2169 2170 /* print out each extent */ 2171 for (extn = 0; (extn < msp->ext.ext_len); extn++) { 2172 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 2173 2174 /* If PRINT_TIMES option is ever supported, add output here */ 2175 if (fprintf(fp, "\t%6u %24llu %24llu\n", 2176 extn, extp->poff, extp->len) == EOF) 2177 return (mdsyserror(ep, errno, fname)); 2178 } 2179 2180 /* separate records with a newline */ 2181 (void) fprintf(fp, "\n"); 2182 return (0); 2183 } 2184 2185 /* 2186 * FUNCTION: meta_sp_print() 2187 * INPUT: sp - the set name for the unit being displayed 2188 * np - the name of the device to print 2189 * fname - ??? not used 2190 * fp - the file pointer to send output to 2191 * options - print options from the command line processor 2192 * OUTPUT: ep - return error pointer 2193 * RETURNS: int - -1 if error, 0 on success 2194 * PURPOSE: print a full report of the device specified by metastat. 2195 * This is the main entry point for printing. 2196 */ 2197 int 2198 meta_sp_print( 2199 mdsetname_t *sp, 2200 mdname_t *np, 2201 mdnamelist_t **nlpp, 2202 char *fname, 2203 FILE *fp, 2204 mdprtopts_t options, 2205 md_error_t *ep 2206 ) 2207 { 2208 md_sp_t *msp; 2209 md_unit_t *mdp; 2210 int rval = 0; 2211 2212 /* should always have the same set */ 2213 assert(sp != NULL); 2214 2215 /* print all the soft partitions */ 2216 if (np == NULL) { 2217 mdnamelist_t *nlp = NULL; 2218 mdnamelist_t *p; 2219 int cnt; 2220 2221 if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0) 2222 return (-1); 2223 else if (cnt == 0) 2224 return (0); 2225 2226 /* recusively print them out */ 2227 for (p = nlp; (p != NULL); p = p->next) { 2228 mdname_t *curnp = p->namep; 2229 2230 /* 2231 * one problem with the rval of -1 here is that 2232 * the error gets "lost" when the next device is 2233 * printed, but we want to print them all anyway. 2234 */ 2235 rval = meta_sp_print(sp, curnp, nlpp, fname, fp, 2236 options, ep); 2237 } 2238 2239 /* clean up, return success */ 2240 metafreenamelist(nlp); 2241 return (rval); 2242 } 2243 2244 /* get the unit structure */ 2245 if ((msp = meta_get_sp_common(sp, np, 2246 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 2247 return (-1); 2248 2249 /* check for parented */ 2250 if ((! (options & PRINT_SUBDEVS)) && 2251 (MD_HAS_PARENT(msp->common.parent))) { 2252 return (0); 2253 } 2254 2255 /* print appropriate detail */ 2256 if (options & PRINT_SHORT) { 2257 if (meta_sp_short_print(msp, fname, fp, options, ep) != 0) 2258 return (-1); 2259 } else { 2260 if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0) 2261 return (-1); 2262 } 2263 2264 /* 2265 * Print underlying metadevices if they are parented to us and 2266 * if the info for the underlying metadevice has not been printed. 2267 */ 2268 if (metaismeta(msp->compnamep)) { 2269 /* get the unit structure for the subdevice */ 2270 if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL) 2271 return (-1); 2272 2273 /* If info not already printed, recurse */ 2274 if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) { 2275 if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp, 2276 (options | PRINT_HEADER | PRINT_SUBDEVS), 2277 NULL, ep) != 0) { 2278 return (-1); 2279 } 2280 BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp))); 2281 } 2282 } 2283 return (0); 2284 } 2285 2286 /* 2287 * ************************************************************************** 2288 * Watermark Manipulation Functions * 2289 * ************************************************************************** 2290 */ 2291 2292 /* 2293 * FUNCTION: meta_sp_get_start() 2294 * INPUT: sp - the operating set 2295 * np - device upon which the sp is being built 2296 * OUTPUT: ep - return error pointer 2297 * RETURNS: daddr_t - -1 if error, otherwise the start block 2298 * PURPOSE: Encapsulate the determination of the start block of the 2299 * device upon which the sp is built or being built. 2300 * This is done to hide the ugliness of the algorithm. In 2301 * the case where a sp is being built upon a stripe of > 1 2302 * TB that is made up of a set of disks in which the first 2303 * has a VTOC label the result returned from the call to 2304 * metagetstart is incorrect. The reason being that a > 1 2305 * TB metadevice will manufacture an EFI label in which the 2306 * start address is zero. This is irrespective of the underlying 2307 * devices. The long term fix for this is to fix 2308 * meta_efi_to_mdvtoc and meta_efi_to mdgeom so that they return 2309 * values that are indicative of the first underlying device in 2310 * metadevice. 2311 */ 2312 static diskaddr_t 2313 meta_sp_get_start( 2314 mdsetname_t *sp, 2315 mdname_t *np, 2316 md_error_t *ep 2317 ) 2318 { 2319 daddr_t start_block; 2320 2321 if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) { 2322 start_block += MD_SP_START; 2323 /* 2324 * In the case that the device upon which the sp is being 2325 * created is a metadevice then ensure that in the case that 2326 * the first underlying device has a vtoc label that it is 2327 * not overwritten with a watermark by setting the start block 2328 * to point just past the vtoc label 2329 */ 2330 if (start_block < VTOC_SIZE && metaismeta(np)) 2331 start_block = VTOC_SIZE; 2332 } 2333 2334 return (start_block); 2335 } 2336 2337 /* 2338 * FUNCTION: meta_sp_update_wm() 2339 * INPUT: sp - the operating set 2340 * msp - a pointer to the XDR unit structure 2341 * extlist - the extent list specifying watermarks to update 2342 * OUTPUT: ep - return error pointer 2343 * RETURNS: int - -1 if error, 0 on success 2344 * PURPOSE: steps backwards through the extent list updating 2345 * watermarks for all extents with the EXTFLG_UPDATE flag 2346 * set. Writing the watermarks guarantees consistency when 2347 * extents must be broken into pieces since the original 2348 * watermark will be the last to be updated, and will be 2349 * changed to point to a new watermark that is already 2350 * known to be consistent. If one of the writes fails, the 2351 * original watermark stays intact and none of the changes 2352 * are realized. 2353 */ 2354 static int 2355 meta_sp_update_wm( 2356 mdsetname_t *sp, 2357 md_sp_t *msp, 2358 sp_ext_node_t *extlist, 2359 md_error_t *ep 2360 ) 2361 { 2362 sp_ext_node_t *ext; 2363 sp_ext_node_t *tail; 2364 mp_watermark_t *wmp, *watermarks; 2365 xsp_offset_t *osp, *offsets; 2366 int update_count = 0; 2367 int rval = 0; 2368 md_unit_t *mdp; 2369 md_sp_update_wm_t update_params; 2370 2371 if (getenv(META_SP_DEBUG)) { 2372 meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n"); 2373 meta_sp_list_dump(extlist); 2374 } 2375 2376 /* 2377 * find the last node so we can write the watermarks backwards 2378 * and count watermarks to update so we can allocate space 2379 */ 2380 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 2381 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2382 update_count++; 2383 } 2384 2385 if (ext->ext_next == NULL) { 2386 tail = ext; 2387 } 2388 } 2389 ext = tail; 2390 2391 wmp = watermarks = 2392 Zalloc(update_count * sizeof (mp_watermark_t)); 2393 osp = offsets = 2394 Zalloc(update_count * sizeof (sp_ext_offset_t)); 2395 2396 while (ext != NULL) { 2397 if ((ext->ext_flags & EXTFLG_UPDATE) != 0) { 2398 /* update watermark */ 2399 wmp->wm_magic = MD_SP_MAGIC; 2400 wmp->wm_version = MD_SP_VERSION; 2401 wmp->wm_type = ext->ext_type; 2402 wmp->wm_seq = ext->ext_seq; 2403 wmp->wm_length = ext->ext_length - MD_SP_WMSIZE; 2404 2405 /* fill in the volume name and set name */ 2406 if (ext->ext_namep != NULL) 2407 (void) strcpy(wmp->wm_mdname, 2408 ext->ext_namep->cname); 2409 else 2410 (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME); 2411 if (ext->ext_setp != NULL && 2412 ext->ext_setp->setno != MD_LOCAL_SET) 2413 (void) strcpy(wmp->wm_setname, 2414 ext->ext_setp->setname); 2415 else 2416 (void) strcpy(wmp->wm_setname, 2417 MD_SP_LOCALSETNAME); 2418 2419 /* Generate the checksum */ 2420 wmp->wm_checksum = 0; 2421 crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum, 2422 sizeof (*wmp), NULL); 2423 2424 /* record the extent offset */ 2425 *osp = ext->ext_offset; 2426 2427 /* Advance the placeholders */ 2428 osp++; wmp++; 2429 } 2430 ext = ext->ext_prev; 2431 } 2432 2433 mdp = meta_get_mdunit(sp, msp->common.namep, ep); 2434 if (mdp == NULL) { 2435 rval = -1; 2436 goto out; 2437 } 2438 2439 (void) memset(&update_params, 0, sizeof (update_params)); 2440 update_params.mnum = MD_SID(mdp); 2441 update_params.count = update_count; 2442 update_params.wmp = (uintptr_t)watermarks; 2443 update_params.osp = (uintptr_t)offsets; 2444 MD_SETDRIVERNAME(&update_params, MD_SP, 2445 MD_MIN2SET(update_params.mnum)); 2446 2447 if (metaioctl(MD_IOC_SPUPDATEWM, &update_params, 2448 &update_params.mde, msp->common.namep->cname) != 0) { 2449 (void) mdstealerror(ep, &update_params.mde); 2450 rval = -1; 2451 goto out; 2452 } 2453 2454 out: 2455 Free(watermarks); 2456 Free(offsets); 2457 2458 return (rval); 2459 } 2460 2461 /* 2462 * FUNCTION: meta_sp_clear_wm() 2463 * INPUT: sp - the operating set 2464 * msp - the unit structure for the soft partition to clear 2465 * OUTPUT: ep - return error pointer 2466 * RETURNS: int - -1 if error, 0 on success 2467 * PURPOSE: steps through the extents for a soft partition unit and 2468 * creates an extent list designed to mark all of the 2469 * watermarks for those extents as free. The extent list 2470 * is then passed to meta_sp_update_wm() to actually write 2471 * the watermarks out. 2472 */ 2473 static int 2474 meta_sp_clear_wm( 2475 mdsetname_t *sp, 2476 md_sp_t *msp, 2477 md_error_t *ep 2478 ) 2479 { 2480 sp_ext_node_t *extlist = NULL; 2481 int numexts = msp->ext.ext_len; 2482 uint_t i; 2483 int rval = 0; 2484 2485 /* for each watermark must set the flag to SP_FREE */ 2486 for (i = 0; i < numexts; i++) { 2487 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 2488 2489 meta_sp_list_insert(NULL, NULL, &extlist, 2490 extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE, 2491 EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 2492 } 2493 2494 /* update watermarks */ 2495 rval = meta_sp_update_wm(sp, msp, extlist, ep); 2496 2497 meta_sp_list_free(&extlist); 2498 return (rval); 2499 } 2500 2501 /* 2502 * FUNCTION: meta_sp_read_wm() 2503 * INPUT: sp - setname for component 2504 * compnp - mdname_t for component 2505 * offset - the offset of the watermark to read (sectors) 2506 * OUTPUT: wm - the watermark structure to read into 2507 * ep - return error pointer 2508 * RETURNS: int - -1 if error, 0 on success 2509 * PURPOSE: seeks out to the requested offset and reads a watermark. 2510 * It then verifies that the magic number is correct and 2511 * that the checksum is valid, returning an error if either 2512 * is wrong. 2513 */ 2514 static int 2515 meta_sp_read_wm( 2516 mdsetname_t *sp, 2517 mdname_t *compnp, 2518 mp_watermark_t *wm, 2519 sp_ext_offset_t offset, 2520 md_error_t *ep 2521 ) 2522 { 2523 md_sp_read_wm_t read_params; 2524 2525 /* 2526 * make sure block offset does not overflow 2^64 bytes and it's a 2527 * multiple of the block size. 2528 */ 2529 assert(offset <= (1LL << (64 - DEV_BSHIFT))); 2530 /* LINTED */ 2531 assert((sizeof (*wm) % DEV_BSIZE) == 0); 2532 2533 (void) memset(wm, 0, sizeof (*wm)); 2534 2535 (void) memset(&read_params, 0, sizeof (read_params)); 2536 read_params.rdev = compnp->dev; 2537 read_params.wmp = (uintptr_t)wm; 2538 read_params.offset = offset; 2539 MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno); 2540 2541 if (metaioctl(MD_IOC_SPREADWM, &read_params, 2542 &read_params.mde, compnp->cname) != 0) { 2543 2544 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2545 "Extent header read failed, block %llu.\n"), offset); 2546 return (mdstealerror(ep, &read_params.mde)); 2547 } 2548 2549 /* make sure magic number is correct */ 2550 if (wm->wm_magic != MD_SP_MAGIC) { 2551 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2552 "found incorrect magic number %x, expected %x.\n"), 2553 wm->wm_magic, MD_SP_MAGIC); 2554 /* 2555 * Pass NULL for the device name as we don't have 2556 * valid watermark contents. 2557 */ 2558 return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL)); 2559 } 2560 2561 if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, 2562 sizeof (*wm), NULL)) { 2563 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 2564 "found incorrect checksum %x.\n"), 2565 wm->wm_checksum); 2566 return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname)); 2567 } 2568 2569 return (0); 2570 } 2571 2572 /* 2573 * ************************************************************************** 2574 * Query Functions 2575 * ************************************************************************** 2576 */ 2577 2578 /* 2579 * IMPORTANT NOTE: This is a static function that assumes that 2580 * its input parameters have been checked and 2581 * have valid values that lie within acceptable 2582 * ranges. 2583 * 2584 * FUNCTION: meta_sp_enough_space() 2585 * INPUT: desired_number_of_sps - the number of soft partitions desired; 2586 * must be > 0 2587 * desired_sp_size - the desired soft partition size in blocks; 2588 * must be > 0 2589 * extent_listpp - a reference to a reference to an extent 2590 * list that lists the extents on a device; 2591 * must be a reference to a reference to a 2592 * valid extent list 2593 * alignment - the desired data space alignment for the sp's 2594 * OUTPUT: boolean_t return value 2595 * RETURNS: boolean_t - B_TRUE if there's enough space in the extent 2596 * list to create the desired soft partitions, 2597 * B_FALSE if there's not enough space 2598 * PURPOSE: determines whether there's enough free space in an extent 2599 * list to allow creation of a set of soft partitions 2600 */ 2601 static boolean_t 2602 meta_sp_enough_space( 2603 int desired_number_of_sps, 2604 blkcnt_t desired_sp_size, 2605 sp_ext_node_t **extent_listpp, 2606 sp_ext_length_t alignment 2607 ) 2608 { 2609 boolean_t enough_space; 2610 int number_of_sps; 2611 int number_of_extents_used; 2612 sp_ext_length_t desired_ext_length = desired_sp_size; 2613 2614 enough_space = B_TRUE; 2615 number_of_sps = 0; 2616 while ((enough_space == B_TRUE) && 2617 (number_of_sps < desired_number_of_sps)) { 2618 /* 2619 * Use the extent allocation algorithm implemented by 2620 * meta_sp_alloc_by_len() to test whether the free 2621 * extents in the extent list referenced by *extent_listpp 2622 * contain enough space to accomodate a soft partition 2623 * of size desired_ext_length. 2624 * 2625 * Repeat the test <desired_number_of_sps> times 2626 * or until it fails, whichever comes first, 2627 * each time allocating the extents required to 2628 * create the soft partition without actually 2629 * creating the soft partition. 2630 */ 2631 number_of_extents_used = meta_sp_alloc_by_len( 2632 TEST_SETNAMEP, 2633 TEST_SOFT_PARTITION_NAMEP, 2634 extent_listpp, 2635 &desired_ext_length, 2636 NO_OFFSET, 2637 alignment); 2638 if (number_of_extents_used == -1) { 2639 enough_space = B_FALSE; 2640 } else { 2641 number_of_sps++; 2642 } 2643 } 2644 return (enough_space); 2645 } 2646 2647 /* 2648 * IMPORTANT NOTE: This is a static function that calls other functions 2649 * that check its mdsetnamep and device_mdnamep 2650 * input parameters, but expects extent_listpp to 2651 * be a initialized to a valid address to which 2652 * it can write a reference to the extent list that 2653 * it creates. 2654 * 2655 * FUNCTION: meta_sp_get_extent_list() 2656 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2657 * for the set containing the device for 2658 * which the extents are to be listed 2659 * device_mdnamep - a reference to the mdname_t structure 2660 * for the device for which the extents 2661 * are to be listed 2662 * OUTPUT: *extent_listpp - a reference to the extent list for 2663 * the device; NULL if the function fails 2664 * *ep - the libmeta error encountered, if any 2665 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2666 * B_FALSE if not 2667 * PURPOSE: gets the extent list for a device 2668 */ 2669 static boolean_t 2670 meta_sp_get_extent_list( 2671 mdsetname_t *mdsetnamep, 2672 mdname_t *device_mdnamep, 2673 sp_ext_node_t **extent_listpp, 2674 md_error_t *ep 2675 ) 2676 { 2677 diskaddr_t device_size_in_blocks; 2678 mdnamelist_t *sp_name_listp; 2679 diskaddr_t start_block_address_in_blocks; 2680 2681 *extent_listpp = NULL; 2682 sp_name_listp = NULL; 2683 2684 start_block_address_in_blocks = meta_sp_get_start(mdsetnamep, 2685 device_mdnamep, 2686 ep); 2687 if (start_block_address_in_blocks == MD_DISKADDR_ERROR) { 2688 if (getenv(META_SP_DEBUG)) { 2689 mde_perror(ep, "meta_sp_get_extent_list:meta_sp_get_start"); 2690 } 2691 return (B_FALSE); 2692 } 2693 2694 device_size_in_blocks = metagetsize(device_mdnamep, ep); 2695 if (device_size_in_blocks == MD_DISKADDR_ERROR) { 2696 if (getenv(META_SP_DEBUG)) { 2697 mde_perror(ep, 2698 "meta_sp_get_extent_list:metagetsize"); 2699 } 2700 return (B_FALSE); 2701 } 2702 2703 /* 2704 * Sanity check: the start block will have skipped an integer 2705 * number of cylinders, C. C will usually be zero. If (C > 0), 2706 * and the disk slice happens to only be C cylinders in total 2707 * size, we'll fail this check. 2708 */ 2709 if (device_size_in_blocks <= 2710 (start_block_address_in_blocks + MD_SP_WMSIZE)) { 2711 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname); 2712 return (B_FALSE); 2713 } 2714 2715 /* 2716 * After this point, we will have allocated resources, so any 2717 * failure returns must be through the supplied "fail" label 2718 * to properly deallocate things. 2719 */ 2720 2721 /* 2722 * Create an empty extent list that starts one watermark past 2723 * the start block of the device and ends one watermark before 2724 * the end of the device. 2725 */ 2726 meta_sp_list_insert(TEST_SETNAMEP, 2727 TEST_SOFT_PARTITION_NAMEP, 2728 extent_listpp, 2729 NO_OFFSET, 2730 (sp_ext_length_t)start_block_address_in_blocks, 2731 EXTTYP_RESERVED, 2732 NO_SEQUENCE_NUMBER, 2733 NO_FLAGS, 2734 meta_sp_cmp_by_offset); 2735 meta_sp_list_insert(TEST_SETNAMEP, 2736 TEST_SOFT_PARTITION_NAMEP, 2737 extent_listpp, 2738 (sp_ext_offset_t)(device_size_in_blocks - 2739 MD_SP_WMSIZE), 2740 MD_SP_WMSIZE, 2741 EXTTYP_END, 2742 NO_SEQUENCE_NUMBER, 2743 NO_FLAGS, 2744 meta_sp_cmp_by_offset); 2745 2746 /* 2747 * Get the list of soft partitions that are already on the 2748 * device. 2749 */ 2750 if (meta_sp_get_by_component(mdsetnamep, device_mdnamep, 2751 &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) { 2752 if (getenv(META_SP_DEBUG)) { 2753 mde_perror(ep, 2754 "meta_sp_get_extent_list:meta_sp_get_by_component"); 2755 } 2756 goto fail; 2757 } 2758 2759 if (sp_name_listp != NULL) { 2760 /* 2761 * If there are soft partitions on the device, add the 2762 * extents used in them to the extent list. 2763 */ 2764 if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp, 2765 extent_listpp, ep) == -1) { 2766 if (getenv(META_SP_DEBUG)) { 2767 mde_perror(ep, "meta_sp_get_extent_list:" 2768 "meta_sp_extlist_from_namelist"); 2769 } 2770 goto fail; 2771 } 2772 metafreenamelist(sp_name_listp); 2773 } 2774 2775 /* 2776 * Add free extents to the extent list to represent 2777 * the remaining regions of free space on the 2778 * device. 2779 */ 2780 meta_sp_list_freefill(extent_listpp, device_size_in_blocks); 2781 return (B_TRUE); 2782 2783 fail: 2784 if (sp_name_listp != NULL) { 2785 metafreenamelist(sp_name_listp); 2786 } 2787 2788 if (*extent_listpp != NULL) { 2789 /* 2790 * meta_sp_list_free sets *extent_listpp to NULL. 2791 */ 2792 meta_sp_list_free(extent_listpp); 2793 } 2794 return (B_FALSE); 2795 } 2796 2797 /* 2798 * IMPORTANT NOTE: This is a static function that calls other functions 2799 * that check its mdsetnamep and mddrivenamep 2800 * input parameters, but expects extent_listpp to 2801 * be a initialized to a valid address to which 2802 * it can write a reference to the extent list that 2803 * it creates. 2804 * 2805 * FUNCTION: meta_sp_get_extent_list_for_drive() 2806 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2807 * for the set containing the drive for 2808 * which the extents are to be listed 2809 * mddrivenamep - a reference to the mddrivename_t structure 2810 * for the drive for which the extents 2811 * are to be listed 2812 * OUTPUT: *extent_listpp - a reference to the extent list for 2813 * the drive; NULL if the function fails 2814 * RETURNS: boolean_t - B_TRUE if the function call was successful, 2815 * B_FALSE if not 2816 * PURPOSE: gets the extent list for a drive when the entire drive 2817 * is to be soft partitioned 2818 */ 2819 static boolean_t 2820 meta_sp_get_extent_list_for_drive( 2821 mdsetname_t *mdsetnamep, 2822 mddrivename_t *mddrivenamep, 2823 sp_ext_node_t **extent_listpp 2824 ) 2825 { 2826 boolean_t can_use; 2827 diskaddr_t free_space; 2828 md_error_t mderror; 2829 mdvtoc_t proposed_vtoc; 2830 int repartition_options; 2831 int return_value; 2832 md_sp_t test_sp_struct; 2833 2834 can_use = B_TRUE; 2835 *extent_listpp = NULL; 2836 mderror = mdnullerror; 2837 test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0, 2838 &mderror); 2839 if (test_sp_struct.compnamep == NULL) { 2840 can_use = B_FALSE; 2841 } 2842 2843 if (can_use == B_TRUE) { 2844 mderror = mdnullerror; 2845 repartition_options = 0; 2846 return_value = meta_check_sp(mdsetnamep, &test_sp_struct, 2847 MDCMD_USE_WHOLE_DISK, &repartition_options, 2848 &mderror); 2849 if (return_value != 0) { 2850 can_use = B_FALSE; 2851 } 2852 } 2853 2854 if (can_use == B_TRUE) { 2855 mderror = mdnullerror; 2856 repartition_options = repartition_options | 2857 (MD_REPART_FORCE | MD_REPART_DONT_LABEL); 2858 return_value = meta_repartition_drive(mdsetnamep, mddrivenamep, 2859 repartition_options, &proposed_vtoc, &mderror); 2860 if (return_value != 0) { 2861 can_use = B_FALSE; 2862 } 2863 } 2864 2865 if (can_use == B_TRUE) { 2866 free_space = proposed_vtoc.parts[MD_SLICE0].size; 2867 if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) { 2868 can_use = B_FALSE; 2869 } 2870 } 2871 2872 if (can_use == B_TRUE) { 2873 /* 2874 * Create an extent list that starts with 2875 * a reserved extent that ends at the start 2876 * of the usable space on slice zero of the 2877 * proposed VTOC, ends with an extent that 2878 * reserves space for a watermark at the end 2879 * of slice zero, and contains a single free 2880 * extent that occupies the rest of the space 2881 * on the slice. 2882 * 2883 * NOTE: 2884 * 2885 * Don't use metagetstart() or metagetsize() to 2886 * find the usable space. They query the mdname_t 2887 * structure that represents an actual device to 2888 * determine the amount of space on the device that 2889 * contains metadata and the total amount of space 2890 * on the device. Since this function creates a 2891 * proposed extent list that doesn't reflect the 2892 * state of an actual device, there's no mdname_t 2893 * structure to be queried. 2894 * 2895 * When a drive is reformatted to prepare for 2896 * soft partitioning, all of slice seven is 2897 * reserved for metadata, all of slice zero is 2898 * available for soft partitioning, and all other 2899 * slices on the drive are empty. The proposed 2900 * extent list for the drive therefore contains 2901 * only three extents: a reserved extent that ends 2902 * at the start of the usable space on slice zero, 2903 * a single free extent that occupies all the usable 2904 * space on slice zero, and an ending extent that 2905 * reserves space for a watermark at the end of 2906 * slice zero. 2907 */ 2908 meta_sp_list_insert(TEST_SETNAMEP, 2909 TEST_SOFT_PARTITION_NAMEP, 2910 extent_listpp, 2911 NO_OFFSET, 2912 (sp_ext_length_t)(MD_SP_START), 2913 EXTTYP_RESERVED, 2914 NO_SEQUENCE_NUMBER, 2915 NO_FLAGS, 2916 meta_sp_cmp_by_offset); 2917 meta_sp_list_insert(TEST_SETNAMEP, 2918 TEST_SOFT_PARTITION_NAMEP, 2919 extent_listpp, 2920 (sp_ext_offset_t)(free_space - MD_SP_WMSIZE), 2921 MD_SP_WMSIZE, 2922 EXTTYP_END, 2923 NO_SEQUENCE_NUMBER, 2924 NO_FLAGS, 2925 meta_sp_cmp_by_offset); 2926 meta_sp_list_freefill(extent_listpp, free_space); 2927 } 2928 return (can_use); 2929 } 2930 2931 /* 2932 * FUNCTION: meta_sp_can_create_sps() 2933 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2934 * for the set containing the device for 2935 * which the extents are to be listed 2936 * mdnamep - a reference to the mdname_t of the device 2937 * on which the soft parititions are to be created 2938 * number_of_sps - the desired number of soft partitions 2939 * sp_size - the desired soft partition size 2940 * OUTPUT: boolean_t return value 2941 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 2942 * B_FALSE if not 2943 * PURPOSE: determines whether a set of soft partitions can be created 2944 * on a device 2945 */ 2946 boolean_t 2947 meta_sp_can_create_sps( 2948 mdsetname_t *mdsetnamep, 2949 mdname_t *mdnamep, 2950 int number_of_sps, 2951 blkcnt_t sp_size 2952 ) 2953 { 2954 sp_ext_node_t *extent_listp; 2955 boolean_t succeeded; 2956 md_error_t mde; 2957 2958 if ((number_of_sps > 0) && (sp_size > 0)) { 2959 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 2960 &extent_listp, &mde); 2961 } else { 2962 succeeded = B_FALSE; 2963 } 2964 2965 /* 2966 * We don't really care about an error return from the 2967 * alignment call; that will just result in passing zero, 2968 * which will be interpreted as no alignment. 2969 */ 2970 2971 if (succeeded == B_TRUE) { 2972 succeeded = meta_sp_enough_space(number_of_sps, 2973 sp_size, &extent_listp, 2974 meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde)); 2975 meta_sp_list_free(&extent_listp); 2976 } 2977 return (succeeded); 2978 } 2979 2980 /* 2981 * FUNCTION: meta_sp_can_create_sps_on_drive() 2982 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 2983 * for the set containing the drive for 2984 * which the extents are to be listed 2985 * mddrivenamep - a reference to the mddrivename_t of the drive 2986 * on which the soft parititions are to be created 2987 * number_of_sps - the desired number of soft partitions 2988 * sp_size - the desired soft partition size 2989 * OUTPUT: boolean_t return value 2990 * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created, 2991 * B_FALSE if not 2992 * PURPOSE: determines whether a set of soft partitions can be created 2993 * on a drive if the entire drive is soft partitioned 2994 */ 2995 boolean_t 2996 meta_sp_can_create_sps_on_drive( 2997 mdsetname_t *mdsetnamep, 2998 mddrivename_t *mddrivenamep, 2999 int number_of_sps, 3000 blkcnt_t sp_size 3001 ) 3002 { 3003 sp_ext_node_t *extent_listp; 3004 boolean_t succeeded; 3005 3006 if ((number_of_sps > 0) && (sp_size > 0)) { 3007 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3008 mddrivenamep, 3009 &extent_listp); 3010 } else { 3011 succeeded = B_FALSE; 3012 } 3013 3014 /* 3015 * We don't care about alignment on the space call because 3016 * we're specifically dealing with a drive, which will have no 3017 * inherent alignment. 3018 */ 3019 3020 if (succeeded == B_TRUE) { 3021 succeeded = meta_sp_enough_space(number_of_sps, sp_size, 3022 &extent_listp, SP_UNALIGNED); 3023 meta_sp_list_free(&extent_listp); 3024 } 3025 return (succeeded); 3026 } 3027 3028 /* 3029 * FUNCTION: meta_sp_get_free_space() 3030 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3031 * for the set containing the device for 3032 * which the free space is to be returned 3033 * mdnamep - a reference to the mdname_t of the device 3034 * for which the free space is to be returned 3035 * OUTPUT: blkcnt_t return value 3036 * RETURNS: blkcnt_t - the number of blocks of free space on the device 3037 * PURPOSE: returns the number of blocks of free space on a device 3038 */ 3039 blkcnt_t 3040 meta_sp_get_free_space( 3041 mdsetname_t *mdsetnamep, 3042 mdname_t *mdnamep 3043 ) 3044 { 3045 sp_ext_node_t *extent_listp; 3046 sp_ext_length_t free_blocks; 3047 boolean_t succeeded; 3048 md_error_t mde; 3049 3050 extent_listp = NULL; 3051 free_blocks = 0; 3052 succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep, 3053 &extent_listp, &mde); 3054 if (succeeded == B_TRUE) { 3055 free_blocks = meta_sp_list_size(extent_listp, 3056 EXTTYP_FREE, INCLUDE_WM); 3057 meta_sp_list_free(&extent_listp); 3058 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3059 /* 3060 * Subtract a safety margin for watermarks when 3061 * computing the number of blocks available for 3062 * use. The actual number of watermarks can't 3063 * be calculated without knowing the exact numbers 3064 * and sizes of both the free extents and the soft 3065 * partitions to be created. The calculation is 3066 * highly complex and error-prone even if those 3067 * quantities are known. The approximate value 3068 * 10 * MD_SP_WMSIZE is within a few blocks of the 3069 * correct value in all practical cases. 3070 */ 3071 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3072 } else { 3073 free_blocks = 0; 3074 } 3075 } else { 3076 mdclrerror(&mde); 3077 } 3078 3079 return (free_blocks); 3080 } 3081 3082 /* 3083 * FUNCTION: meta_sp_get_free_space_on_drive() 3084 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3085 * for the set containing the drive for 3086 * which the free space is to be returned 3087 * mddrivenamep - a reference to the mddrivename_t of the drive 3088 * for which the free space is to be returned 3089 * OUTPUT: blkcnt_t return value 3090 * RETURNS: blkcnt_t - the number of blocks of free space on the drive 3091 * PURPOSE: returns the number of blocks of space usable for soft 3092 * partitions on an entire drive, if the entire drive is 3093 * soft partitioned 3094 */ 3095 blkcnt_t 3096 meta_sp_get_free_space_on_drive( 3097 mdsetname_t *mdsetnamep, 3098 mddrivename_t *mddrivenamep 3099 ) 3100 { 3101 sp_ext_node_t *extent_listp; 3102 sp_ext_length_t free_blocks; 3103 boolean_t succeeded; 3104 3105 extent_listp = NULL; 3106 free_blocks = 0; 3107 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3108 mddrivenamep, &extent_listp); 3109 if (succeeded == B_TRUE) { 3110 free_blocks = meta_sp_list_size(extent_listp, 3111 EXTTYP_FREE, INCLUDE_WM); 3112 meta_sp_list_free(&extent_listp); 3113 if (free_blocks > (10 * MD_SP_WMSIZE)) { 3114 /* 3115 * Subtract a safety margin for watermarks when 3116 * computing the number of blocks available for 3117 * use. The actual number of watermarks can't 3118 * be calculated without knowing the exact numbers 3119 * and sizes of both the free extents and the soft 3120 * partitions to be created. The calculation is 3121 * highly complex and error-prone even if those 3122 * quantities are known. The approximate value 3123 * 10 * MD_SP_WMSIZE is within a few blocks of the 3124 * correct value in all practical cases. 3125 */ 3126 free_blocks = free_blocks - (10 * MD_SP_WMSIZE); 3127 } else { 3128 free_blocks = 0; 3129 } 3130 } 3131 return (free_blocks); 3132 } 3133 3134 /* 3135 * FUNCTION: meta_sp_get_number_of_possible_sps() 3136 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3137 * for the set containing the device for 3138 * which the number of possible soft partitions 3139 * is to be returned 3140 * mdnamep - a reference to the mdname_t of the device 3141 * for which the number of possible soft partitions 3142 * is to be returned 3143 * OUTPUT: int return value 3144 * RETURNS: int - the number of soft partitions of the desired size 3145 * that can be created on the device 3146 * PURPOSE: returns the number of soft partitions of a given size 3147 * that can be created on a device 3148 */ 3149 int 3150 meta_sp_get_number_of_possible_sps( 3151 mdsetname_t *mdsetnamep, 3152 mdname_t *mdnamep, 3153 blkcnt_t sp_size 3154 ) 3155 { 3156 sp_ext_node_t *extent_listp; 3157 int number_of_possible_sps; 3158 boolean_t succeeded; 3159 md_error_t mde; 3160 sp_ext_length_t alignment; 3161 3162 extent_listp = NULL; 3163 number_of_possible_sps = 0; 3164 if (sp_size > 0) { 3165 if ((succeeded = meta_sp_get_extent_list(mdsetnamep, 3166 mdnamep, &extent_listp, &mde)) == B_FALSE) 3167 mdclrerror(&mde); 3168 } else { 3169 succeeded = B_FALSE; 3170 } 3171 3172 if (succeeded == B_TRUE) { 3173 alignment = meta_sp_get_default_alignment(mdsetnamep, 3174 mdnamep, &mde); 3175 } 3176 3177 while (succeeded == B_TRUE) { 3178 /* 3179 * Keep allocating space from the extent list 3180 * for soft partitions of the desired size until 3181 * there's not enough free space left in the list 3182 * for another soft partiition of that size. 3183 * Add one to the number of possible soft partitions 3184 * for each soft partition for which there is 3185 * enough free space left. 3186 */ 3187 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3188 sp_size, &extent_listp, alignment); 3189 if (succeeded == B_TRUE) { 3190 number_of_possible_sps++; 3191 } 3192 } 3193 if (extent_listp != NULL) { 3194 meta_sp_list_free(&extent_listp); 3195 } 3196 return (number_of_possible_sps); 3197 } 3198 3199 /* 3200 * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive() 3201 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3202 * for the set containing the drive for 3203 * which the number of possible soft partitions 3204 * is to be returned 3205 * mddrivenamep - a reference to the mddrivename_t of the drive 3206 * for which the number of possible soft partitions 3207 * is to be returned 3208 * sp_size - the size in blocks of the proposed soft partitions 3209 * OUTPUT: int return value 3210 * RETURNS: int - the number of soft partitions of the desired size 3211 * that can be created on the drive 3212 * PURPOSE: returns the number of soft partitions of a given size 3213 * that can be created on a drive, if the entire drive is 3214 * soft partitioned 3215 */ 3216 int 3217 meta_sp_get_number_of_possible_sps_on_drive( 3218 mdsetname_t *mdsetnamep, 3219 mddrivename_t *mddrivenamep, 3220 blkcnt_t sp_size 3221 ) 3222 { 3223 sp_ext_node_t *extent_listp; 3224 int number_of_possible_sps; 3225 boolean_t succeeded; 3226 3227 extent_listp = NULL; 3228 number_of_possible_sps = 0; 3229 if (sp_size > 0) { 3230 succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep, 3231 mddrivenamep, &extent_listp); 3232 } else { 3233 succeeded = B_FALSE; 3234 } 3235 while (succeeded == B_TRUE) { 3236 /* 3237 * Keep allocating space from the extent list 3238 * for soft partitions of the desired size until 3239 * there's not enough free space left in the list 3240 * for another soft partition of that size. 3241 * Add one to the number of possible soft partitions 3242 * for each soft partition for which there is 3243 * enough free space left. 3244 * 3245 * Since it's a drive, not a metadevice, make no 3246 * assumptions about alignment. 3247 */ 3248 succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION, 3249 sp_size, &extent_listp, SP_UNALIGNED); 3250 if (succeeded == B_TRUE) { 3251 number_of_possible_sps++; 3252 } 3253 } 3254 if (extent_listp != NULL) { 3255 meta_sp_list_free(&extent_listp); 3256 } 3257 return (number_of_possible_sps); 3258 } 3259 3260 /* 3261 * FUNCTION: meta_sp_get_possible_sp_size() 3262 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3263 * for the set containing the device for 3264 * which the possible soft partition size 3265 * is to be returned 3266 * mdnamep - a reference to the mdname_t of the device 3267 * for which the possible soft partition size 3268 * is to be returned 3269 * number_of_sps - the desired number of soft partitions 3270 * OUTPUT: blkcnt_t return value 3271 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3272 * PURPOSE: returns the maximum possible size of each of a given number of 3273 * soft partitions of equal size that can be created on a device 3274 */ 3275 blkcnt_t 3276 meta_sp_get_possible_sp_size( 3277 mdsetname_t *mdsetnamep, 3278 mdname_t *mdnamep, 3279 int number_of_sps 3280 ) 3281 { 3282 blkcnt_t free_blocks; 3283 blkcnt_t sp_size; 3284 boolean_t succeeded; 3285 3286 sp_size = 0; 3287 if (number_of_sps > 0) { 3288 free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep); 3289 sp_size = free_blocks / number_of_sps; 3290 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3291 number_of_sps, sp_size); 3292 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3293 /* 3294 * To compensate for space that may have been 3295 * occupied by watermarks, reduce sp_size by a 3296 * number of blocks equal to the number of soft 3297 * partitions desired, and test again to see 3298 * whether the desired number of soft partitions 3299 * can be created. 3300 */ 3301 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3302 succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep, 3303 number_of_sps, sp_size); 3304 } 3305 if (sp_size < 0) { 3306 sp_size = 0; 3307 } 3308 } 3309 return (sp_size); 3310 } 3311 3312 /* 3313 * FUNCTION: meta_sp_get_possible_sp_size_on_drive() 3314 * INPUT: mdsetnamep - a reference to the mdsetname_t structure 3315 * for the set containing the drive for 3316 * which the possible soft partition size 3317 * is to be returned 3318 * mddrivenamep - a reference to the mddrivename_t of the drive 3319 * for which the possible soft partition size 3320 * is to be returned 3321 * number_of_sps - the desired number of soft partitions 3322 * OUTPUT: blkcnt_t return value 3323 * RETURNS: blkcnt_t - the possible soft partition size in blocks 3324 * PURPOSE: returns the maximum possible size of each of a given number of 3325 * soft partitions of equal size that can be created on a drive 3326 * if the entire drive is soft partitioned 3327 */ 3328 blkcnt_t 3329 meta_sp_get_possible_sp_size_on_drive( 3330 mdsetname_t *mdsetnamep, 3331 mddrivename_t *mddrivenamep, 3332 int number_of_sps 3333 ) 3334 { 3335 blkcnt_t free_blocks; 3336 blkcnt_t sp_size; 3337 boolean_t succeeded; 3338 3339 sp_size = 0; 3340 if (number_of_sps > 0) { 3341 free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep, 3342 mddrivenamep); 3343 sp_size = free_blocks / number_of_sps; 3344 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3345 mddrivenamep, 3346 number_of_sps, sp_size); 3347 while ((succeeded == B_FALSE) && (sp_size > 0)) { 3348 /* 3349 * To compensate for space that may have been 3350 * occupied by watermarks, reduce sp_size by a 3351 * number of blocks equal to the number of soft 3352 * partitions desired, and test again to see 3353 * whether the desired number of soft partitions 3354 * can be created. 3355 */ 3356 sp_size = sp_size - ((blkcnt_t)number_of_sps); 3357 succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep, 3358 mddrivenamep, 3359 number_of_sps, sp_size); 3360 } 3361 if (sp_size < 0) { 3362 sp_size = 0; 3363 } 3364 } 3365 return (sp_size); 3366 } 3367 3368 /* 3369 * ************************************************************************** 3370 * Unit Structure Manipulation Functions * 3371 * ************************************************************************** 3372 */ 3373 3374 /* 3375 * FUNCTION: meta_sp_fillextarray() 3376 * INPUT: mp - the unit structure to fill 3377 * extlist - the list of extents to fill with 3378 * OUTPUT: none 3379 * RETURNS: void 3380 * PURPOSE: fills in the unit structure extent list with the extents 3381 * specified by extlist. Only extents in extlist with the 3382 * EXTFLG_UPDATE flag are changed in the unit structure, 3383 * and the index into the unit structure is the sequence 3384 * number in the extent list. After all of the nodes have 3385 * been updated the virtual offsets in the unit structure 3386 * are updated to reflect the new lengths. 3387 */ 3388 static void 3389 meta_sp_fillextarray( 3390 mp_unit_t *mp, 3391 sp_ext_node_t *extlist 3392 ) 3393 { 3394 int i; 3395 sp_ext_node_t *ext; 3396 sp_ext_offset_t curvoff = 0LL; 3397 3398 assert(mp != NULL); 3399 3400 /* go through the allocation list and fill in our unit structure */ 3401 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 3402 if ((ext->ext_type == EXTTYP_ALLOC) && 3403 (ext->ext_flags & EXTFLG_UPDATE) != 0) { 3404 mp->un_ext[ext->ext_seq].un_poff = 3405 ext->ext_offset + MD_SP_WMSIZE; 3406 mp->un_ext[ext->ext_seq].un_len = 3407 ext->ext_length - MD_SP_WMSIZE; 3408 } 3409 } 3410 3411 for (i = 0; i < mp->un_numexts; i++) { 3412 assert(mp->un_ext[i].un_poff != 0); 3413 assert(mp->un_ext[i].un_len != 0); 3414 mp->un_ext[i].un_voff = curvoff; 3415 curvoff += mp->un_ext[i].un_len; 3416 } 3417 } 3418 3419 /* 3420 * FUNCTION: meta_sp_createunit() 3421 * INPUT: np - the name of the device to create a unit structure for 3422 * compnp - the name of the device the soft partition is on 3423 * extlist - the extent list to populate the new unit with 3424 * numexts - the number of extents in the extent list 3425 * len - the total size of the soft partition (sectors) 3426 * status - the initial status of the unit structure 3427 * OUTPUT: ep - return error pointer 3428 * RETURNS: mp_unit_t * - the new unit structure. 3429 * PURPOSE: allocates and fills in a new soft partition unit 3430 * structure to be passed to the soft partitioning driver 3431 * for creation. 3432 */ 3433 static mp_unit_t * 3434 meta_sp_createunit( 3435 mdname_t *np, 3436 mdname_t *compnp, 3437 sp_ext_node_t *extlist, 3438 int numexts, 3439 sp_ext_length_t len, 3440 sp_status_t status, 3441 md_error_t *ep 3442 ) 3443 { 3444 mp_unit_t *mp; 3445 uint_t ms_size; 3446 3447 ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) + 3448 (numexts * sizeof (mp->un_ext[0])); 3449 3450 mp = Zalloc(ms_size); 3451 3452 /* fill in fields in common unit structure */ 3453 mp->c.un_type = MD_METASP; 3454 mp->c.un_size = ms_size; 3455 MD_SID(mp) = meta_getminor(np->dev); 3456 mp->c.un_total_blocks = len; 3457 mp->c.un_actual_tb = len; 3458 3459 /* set up geometry */ 3460 (void) meta_sp_setgeom(np, compnp, mp, ep); 3461 3462 /* if we're building on metadevice we can't parent */ 3463 if (metaismeta(compnp)) 3464 MD_CAPAB(mp) = MD_CANT_PARENT; 3465 else 3466 MD_CAPAB(mp) = MD_CAN_PARENT; 3467 3468 /* fill soft partition-specific fields */ 3469 mp->un_dev = compnp->dev; 3470 mp->un_key = compnp->key; 3471 3472 /* mdname_t start_blk field is not 64-bit! */ 3473 mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk; 3474 mp->un_status = status; 3475 mp->un_numexts = numexts; 3476 mp->un_length = len; 3477 3478 /* fill in the extent array */ 3479 meta_sp_fillextarray(mp, extlist); 3480 3481 return (mp); 3482 } 3483 3484 /* 3485 * FUNCTION: meta_sp_updateunit() 3486 * INPUT: np - name structure for the metadevice being updated 3487 * old_un - the original unit structure that is being updated 3488 * extlist - the extent list to populate the new unit with 3489 * grow_len - the amount by which the partition is being grown 3490 * numexts - the number of extents in the extent list 3491 * ep - return error pointer 3492 * OUTPUT: none 3493 * RETURNS: mp_unit_t * - the updated unit structure 3494 * PURPOSE: allocates and fills in a new soft partition unit structure to 3495 * be passed to the soft partitioning driver for creation. The 3496 * old unit structure is first copied in, and then the updated 3497 * extents are changed in the new unit structure. This is 3498 * typically used when the size of an existing unit is changed. 3499 */ 3500 static mp_unit_t * 3501 meta_sp_updateunit( 3502 mdname_t *np, 3503 mp_unit_t *old_un, 3504 sp_ext_node_t *extlist, 3505 sp_ext_length_t grow_len, 3506 int numexts, 3507 md_error_t *ep 3508 ) 3509 { 3510 mp_unit_t *new_un; 3511 sp_ext_length_t new_len; 3512 uint_t new_size; 3513 3514 assert(old_un != NULL); 3515 assert(extlist != NULL); 3516 3517 /* allocate new unit structure and copy in old unit */ 3518 new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) + 3519 ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0])); 3520 new_len = old_un->un_length + grow_len; 3521 new_un = Zalloc(new_size); 3522 bcopy(old_un, new_un, old_un->c.un_size); 3523 3524 /* update size and geometry information */ 3525 new_un->c.un_size = new_size; 3526 new_un->un_length = new_len; 3527 new_un->c.un_total_blocks = new_len; 3528 new_un->c.un_actual_tb = new_len; 3529 if (meta_adjust_geom((md_unit_t *)new_un, np, 3530 old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct, 3531 0, ep) != 0) { 3532 Free(new_un); 3533 return (NULL); 3534 } 3535 3536 /* update extent information */ 3537 new_un->un_numexts += numexts; 3538 3539 meta_sp_fillextarray(new_un, extlist); 3540 3541 return (new_un); 3542 } 3543 3544 /* 3545 * FUNCTION: meta_get_sp() 3546 * INPUT: sp - the set name for the device to get 3547 * np - the name of the device to get 3548 * OUTPUT: ep - return error pointer 3549 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition 3550 * PURPOSE: interface to the rest of libmeta for fetching a unit structure 3551 * for the named device. Just a wrapper for meta_get_sp_common(). 3552 */ 3553 md_sp_t * 3554 meta_get_sp( 3555 mdsetname_t *sp, 3556 mdname_t *np, 3557 md_error_t *ep 3558 ) 3559 { 3560 return (meta_get_sp_common(sp, np, 0, ep)); 3561 } 3562 3563 /* 3564 * FUNCTION: meta_get_sp_common() 3565 * INPUT: sp - the set name for the device to get 3566 * np - the name of the device to get 3567 * fast - whether to use the cache or not (NOT IMPLEMENTED!) 3568 * OUTPUT: ep - return error pointer 3569 * RETURNS: md_sp_t * - the XDR unit structure for the soft partition, 3570 * NULL if np is not a soft partition 3571 * PURPOSE: common routine for fetching a soft partition unit structure 3572 */ 3573 md_sp_t * 3574 meta_get_sp_common( 3575 mdsetname_t *sp, 3576 mdname_t *np, 3577 int fast, 3578 md_error_t *ep 3579 ) 3580 { 3581 mddrivename_t *dnp = np->drivenamep; 3582 char *miscname; 3583 mp_unit_t *mp; 3584 md_sp_t *msp; 3585 int i; 3586 3587 /* must have set */ 3588 assert(sp != NULL); 3589 3590 /* short circuit */ 3591 if (dnp->unitp != NULL) { 3592 if (dnp->unitp->type != MD_METASP) 3593 return (NULL); 3594 return ((md_sp_t *)dnp->unitp); 3595 } 3596 /* get miscname and unit */ 3597 if ((miscname = metagetmiscname(np, ep)) == NULL) 3598 return (NULL); 3599 3600 if (strcmp(miscname, MD_SP) != 0) { 3601 (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname); 3602 return (NULL); 3603 } 3604 3605 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 3606 return (NULL); 3607 3608 assert(mp->c.un_type == MD_METASP); 3609 3610 /* allocate soft partition */ 3611 msp = Zalloc(sizeof (*msp)); 3612 3613 /* get the common information */ 3614 msp->common.namep = np; 3615 msp->common.type = mp->c.un_type; 3616 msp->common.state = mp->c.un_status; 3617 msp->common.capabilities = mp->c.un_capabilities; 3618 msp->common.parent = mp->c.un_parent; 3619 msp->common.size = mp->c.un_total_blocks; 3620 msp->common.user_flags = mp->c.un_user_flags; 3621 msp->common.revision = mp->c.un_revision; 3622 3623 /* get soft partition information */ 3624 if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL) 3625 goto out; 3626 3627 /* 3628 * Fill in the key and the start block. Note that the start 3629 * block in the unit structure is 64 bits but the name pointer 3630 * only supports 32 bits. 3631 */ 3632 msp->compnamep->key = mp->un_key; 3633 msp->compnamep->start_blk = mp->un_start_blk; 3634 3635 /* fill in status field */ 3636 msp->status = mp->un_status; 3637 3638 /* allocate the extents */ 3639 msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val)); 3640 msp->ext.ext_len = mp->un_numexts; 3641 3642 /* do the extents for this soft partition */ 3643 for (i = 0; i < mp->un_numexts; i++) { 3644 struct mp_ext *mde = &mp->un_ext[i]; 3645 md_sp_ext_t *extp = &msp->ext.ext_val[i]; 3646 3647 extp->voff = mde->un_voff; 3648 extp->poff = mde->un_poff; 3649 extp->len = mde->un_len; 3650 } 3651 3652 /* cleanup, return success */ 3653 Free(mp); 3654 dnp->unitp = (md_common_t *)msp; 3655 return (msp); 3656 3657 out: 3658 /* clean up and return error */ 3659 Free(mp); 3660 Free(msp); 3661 return (NULL); 3662 } 3663 3664 3665 /* 3666 * FUNCTION: meta_init_sp() 3667 * INPUT: spp - the set name for the new device 3668 * argc - the remaining argument count for the metainit cmdline 3669 * argv - the remainder of the unparsed command line 3670 * options - global options parsed by metainit 3671 * OUTPUT: ep - return error pointer 3672 * RETURNS: int - -1 failure, 0 success 3673 * PURPOSE: provides the command line parsing and name management overhead 3674 * for creating a new soft partition. Ultimately this calls 3675 * meta_create_sp() which does the real work of allocating space 3676 * for the new soft partition. 3677 */ 3678 int 3679 meta_init_sp( 3680 mdsetname_t **spp, 3681 int argc, 3682 char *argv[], 3683 mdcmdopts_t options, 3684 md_error_t *ep 3685 ) 3686 { 3687 char *compname = NULL; 3688 mdname_t *spcompnp = NULL; /* name of component volume */ 3689 char *devname = argv[0]; /* unit name */ 3690 mdname_t *np = NULL; /* name of soft partition */ 3691 md_sp_t *msp = NULL; 3692 int c; 3693 int old_optind; 3694 sp_ext_length_t len = 0LL; 3695 int rval = -1; 3696 uint_t seq; 3697 int oflag; 3698 int failed; 3699 mddrivename_t *dnp = NULL; 3700 sp_ext_length_t alignment = 0LL; 3701 sp_ext_node_t *extlist = NULL; 3702 3703 assert(argc > 0); 3704 3705 /* expect sp name, -p, optional -e, compname, and size parameters */ 3706 /* grab soft partition name */ 3707 if ((np = metaname(spp, devname, ep)) == NULL) 3708 goto out; 3709 3710 /* see if it exists already */ 3711 if (metagetmiscname(np, ep) != NULL) { 3712 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 3713 meta_getminor(np->dev), devname); 3714 goto out; 3715 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 3716 goto out; 3717 } else { 3718 mdclrerror(ep); 3719 } 3720 --argc, ++argv; 3721 3722 if (argc == 0) 3723 goto syntax; 3724 3725 /* grab -p */ 3726 if (strcmp(argv[0], "-p") != 0) 3727 goto syntax; 3728 --argc, ++argv; 3729 3730 if (argc == 0) 3731 goto syntax; 3732 3733 /* see if -e is there */ 3734 if (strcmp(argv[0], "-e") == 0) { 3735 /* use the whole disk */ 3736 options |= MDCMD_USE_WHOLE_DISK; 3737 --argc, ++argv; 3738 } 3739 3740 if (argc == 0) 3741 goto syntax; 3742 3743 /* get component name */ 3744 compname = Strdup(argv[0]); 3745 3746 if (options & MDCMD_USE_WHOLE_DISK) { 3747 if ((dnp = metadrivename(spp, compname, ep)) == NULL) { 3748 goto out; 3749 } 3750 if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) { 3751 goto out; 3752 } 3753 } else if ((spcompnp = metaname(spp, compname, ep)) == NULL) { 3754 goto out; 3755 } 3756 assert(*spp != NULL); 3757 3758 if (!(options & MDCMD_NOLOCK)) { 3759 /* grab set lock */ 3760 if (meta_lock(*spp, TRUE, ep)) 3761 goto out; 3762 3763 if (meta_check_ownership(*spp, ep) != 0) 3764 goto out; 3765 } 3766 3767 /* allocate the soft partition */ 3768 msp = Zalloc(sizeof (*msp)); 3769 3770 /* setup common */ 3771 msp->common.namep = np; 3772 msp->common.type = MD_METASP; 3773 3774 compname = spcompnp->cname; 3775 3776 assert(spcompnp->rname != NULL); 3777 --argc, ++argv; 3778 3779 if (argc == 0) { 3780 goto syntax; 3781 } 3782 3783 if (*argv[0] == '-') { 3784 /* 3785 * parse any other command line options, this includes 3786 * the recovery options -o and -b. The special thing 3787 * with these options is that the len needs to be 3788 * kept track of otherwise when the geometry of the 3789 * "device" is built it will create an invalid geometry 3790 */ 3791 old_optind = optind = 0; 3792 opterr = 0; 3793 oflag = 0; 3794 seq = 0; 3795 failed = 0; 3796 while ((c = getopt(argc, argv, "A:o:b:")) != -1) { 3797 sp_ext_offset_t offset; 3798 sp_ext_length_t length; 3799 longlong_t tmp_size; 3800 3801 switch (c) { 3802 case 'A': /* data alignment */ 3803 if (meta_sp_parsesizestring(optarg, 3804 &alignment) == -1) { 3805 failed = 1; 3806 } 3807 break; 3808 case 'o': /* offset in the partition */ 3809 if (oflag == 1) { 3810 failed = 1; 3811 } else { 3812 tmp_size = atoll(optarg); 3813 if (tmp_size <= 0) { 3814 failed = 1; 3815 } else { 3816 oflag = 1; 3817 options |= MDCMD_DIRECT; 3818 3819 offset = tmp_size; 3820 } 3821 } 3822 3823 break; 3824 case 'b': /* number of blocks */ 3825 if (oflag == 0) { 3826 failed = 1; 3827 } else { 3828 tmp_size = atoll(optarg); 3829 if (tmp_size <= 0) { 3830 failed = 1; 3831 } else { 3832 oflag = 0; 3833 3834 length = tmp_size; 3835 3836 /* we have a pair of values */ 3837 meta_sp_list_insert(*spp, np, 3838 &extlist, offset, 3839 length, EXTTYP_ALLOC, 3840 seq++, EXTFLG_UPDATE, 3841 meta_sp_cmp_by_offset); 3842 len += length; 3843 } 3844 } 3845 3846 break; 3847 default: 3848 argc -= old_optind; 3849 argv += old_optind; 3850 goto options; 3851 } 3852 3853 if (failed) { 3854 argc -= old_optind; 3855 argv += old_optind; 3856 goto syntax; 3857 } 3858 3859 old_optind = optind; 3860 } 3861 argc -= optind; 3862 argv += optind; 3863 3864 /* 3865 * Must have matching pairs of -o and -b flags 3866 */ 3867 if (oflag != 0) 3868 goto syntax; 3869 3870 /* 3871 * Can't specify both layout (indicated indirectly by 3872 * len being set by thye -o/-b cases above) AND 3873 * alignment 3874 */ 3875 if ((len > 0LL) && (alignment > 0LL)) 3876 goto syntax; 3877 3878 /* 3879 * sanity check the allocation list 3880 */ 3881 if ((extlist != NULL) && meta_sp_list_overlaps(extlist)) 3882 goto syntax; 3883 } 3884 3885 if (len == 0LL) { 3886 if (argc == 0) 3887 goto syntax; 3888 if (meta_sp_parsesize(argv[0], &len) == -1) 3889 goto syntax; 3890 --argc, ++argv; 3891 } 3892 3893 msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val)); 3894 msp->ext.ext_val->len = len; 3895 msp->compnamep = spcompnp; 3896 3897 /* we should be at the end */ 3898 if (argc != 0) 3899 goto syntax; 3900 3901 /* create soft partition */ 3902 if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0) 3903 goto out; 3904 rval = 0; 3905 3906 /* let em know */ 3907 if (options & MDCMD_PRINT) { 3908 (void) printf(dgettext(TEXT_DOMAIN, 3909 "%s: Soft Partition is setup\n"), 3910 devname); 3911 (void) fflush(stdout); 3912 } 3913 goto out; 3914 3915 syntax: 3916 /* syntax error */ 3917 rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv); 3918 goto out; 3919 3920 options: 3921 /* options error */ 3922 rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv); 3923 goto out; 3924 3925 out: 3926 if (msp != NULL) { 3927 if (msp->ext.ext_val != NULL) { 3928 Free(msp->ext.ext_val); 3929 } 3930 Free(msp); 3931 } 3932 3933 return (rval); 3934 } 3935 3936 /* 3937 * FUNCTION: meta_free_sp() 3938 * INPUT: msp - the soft partition unit to free 3939 * OUTPUT: none 3940 * RETURNS: void 3941 * PURPOSE: provides an interface from the rest of libmeta for freeing a 3942 * soft partition unit 3943 */ 3944 void 3945 meta_free_sp(md_sp_t *msp) 3946 { 3947 Free(msp); 3948 } 3949 3950 /* 3951 * FUNCTION: meta_sp_issp() 3952 * INPUT: sp - the set name to check 3953 * np - the name to check 3954 * OUTPUT: ep - return error pointer 3955 * RETURNS: int - 0 means sp,np is a soft partition 3956 * 1 means sp,np is not a soft partition 3957 * PURPOSE: determines whether the given device is a soft partition 3958 * device. This is called by other metadevice check routines. 3959 */ 3960 int 3961 meta_sp_issp( 3962 mdsetname_t *sp, 3963 mdname_t *np, 3964 md_error_t *ep 3965 ) 3966 { 3967 if (meta_get_sp_common(sp, np, 0, ep) == NULL) 3968 return (1); 3969 3970 return (0); 3971 } 3972 3973 /* 3974 * FUNCTION: meta_check_sp() 3975 * INPUT: sp - the set name to check 3976 * msp - the unit structure to check 3977 * options - creation options 3978 * OUTPUT: repart_options - options to be passed to 3979 * meta_repartition_drive() 3980 * ep - return error pointer 3981 * RETURNS: int - 0 ok to create on this component 3982 * -1 error or not ok to create on this component 3983 * PURPOSE: Checks to determine whether the rules for creation of 3984 * soft partitions allow creation of a soft partition on 3985 * the device described by the mdname_t structure referred 3986 * to by msp->compnamep. 3987 * 3988 * NOTE: Does NOT check to determine whether the extents 3989 * described in the md_sp_t structure referred to by 3990 * msp will fit on the device described by the mdname_t 3991 * structure located at msp->compnamep. 3992 */ 3993 static int 3994 meta_check_sp( 3995 mdsetname_t *sp, 3996 md_sp_t *msp, 3997 mdcmdopts_t options, 3998 int *repart_options, 3999 md_error_t *ep 4000 ) 4001 { 4002 md_common_t *mdp; 4003 mdname_t *compnp = msp->compnamep; 4004 uint_t slice; 4005 mddrivename_t *dnp; 4006 mdname_t *slicenp; 4007 mdvtoc_t *vtocp; 4008 4009 /* make sure it is in the set */ 4010 if (meta_check_inset(sp, compnp, ep) != 0) 4011 return (-1); 4012 4013 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4014 uint_t rep_slice; 4015 4016 /* 4017 * check to make sure we can partition this drive. 4018 * we cannot continue if any of the following are 4019 * true: 4020 * The drive is a metadevice. 4021 * The drive contains a mounted slice. 4022 * The drive contains a slice being swapped to. 4023 * The drive contains slices which are part of other 4024 * metadevices. 4025 * The drive contains a metadb. 4026 */ 4027 if (metaismeta(compnp)) 4028 return (mddeverror(ep, MDE_IS_META, compnp->dev, 4029 compnp->cname)); 4030 4031 assert(compnp->drivenamep != NULL); 4032 4033 /* 4034 * ensure that we have slice 0 since the disk will be 4035 * repartitioned in the USE_WHOLE_DISK case. this check 4036 * is redundant unless the user incorrectly specifies a 4037 * a fully qualified drive AND slice name (i.e., 4038 * /dev/dsk/cXtXdXsX), which will be incorrectly 4039 * recognized as a drive name by the metaname code. 4040 */ 4041 4042 if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL) 4043 return (-1); 4044 if (slice != MD_SLICE0) 4045 return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname)); 4046 4047 dnp = compnp->drivenamep; 4048 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 4049 return (-1); 4050 4051 for (slice = 0; slice < vtocp->nparts; slice++) { 4052 4053 /* only check if the slice really exists */ 4054 if (vtocp->parts[slice].size == 0) 4055 continue; 4056 4057 slicenp = metaslicename(dnp, slice, ep); 4058 if (slicenp == NULL) 4059 return (-1); 4060 4061 /* check to ensure that it is not already in use */ 4062 if (meta_check_inuse(sp, 4063 slicenp, MDCHK_INUSE, ep) != 0) { 4064 return (-1); 4065 } 4066 4067 /* 4068 * Up to this point, tests are applied to all 4069 * slices uniformly. 4070 */ 4071 4072 if (slice == rep_slice) { 4073 /* 4074 * Tests inside the body of this 4075 * conditional are applied only to 4076 * slice seven. 4077 */ 4078 if (meta_check_inmeta(sp, slicenp, 4079 options | MDCHK_ALLOW_MDDB | 4080 MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0) 4081 return (-1); 4082 4083 /* 4084 * For slice seven, a metadb is NOT an 4085 * automatic failure. It merely means 4086 * that we're not allowed to muck 4087 * about with the partitioning of that 4088 * slice. We indicate this by masking 4089 * in the MD_REPART_LEAVE_REP flag. 4090 */ 4091 if (metahasmddb(sp, slicenp, ep)) { 4092 assert(repart_options != 4093 NULL); 4094 *repart_options |= 4095 MD_REPART_LEAVE_REP; 4096 } 4097 4098 /* 4099 * Skip the remaining tests for slice 4100 * seven 4101 */ 4102 continue; 4103 } 4104 4105 /* 4106 * Tests below this point will be applied to 4107 * all slices EXCEPT for the replica slice. 4108 */ 4109 4110 4111 /* check if component is in a metadevice */ 4112 if (meta_check_inmeta(sp, slicenp, options, 0, 4113 -1, ep) != 0) 4114 return (-1); 4115 4116 /* check to see if component has a metadb */ 4117 if (metahasmddb(sp, slicenp, ep)) 4118 return (mddeverror(ep, MDE_HAS_MDDB, 4119 slicenp->dev, slicenp->cname)); 4120 } 4121 /* 4122 * This should be all of the testing necessary when 4123 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of 4124 * meta_check_sp() is oriented towards component 4125 * arguments instead of disks. 4126 */ 4127 goto meta_check_sp_ok; 4128 4129 } 4130 4131 /* check to ensure that it is not already in use */ 4132 if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) { 4133 return (-1); 4134 } 4135 4136 if (!metaismeta(compnp)) { /* handle non-metadevices */ 4137 4138 /* 4139 * The component can have one or more soft partitions on it 4140 * already, but can't be part of any other type of metadevice, 4141 * so if it is used for a metadevice, but the metadevice 4142 * isn't a soft partition, return failure. 4143 */ 4144 4145 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 && 4146 meta_check_insp(sp, compnp, 0, -1, ep) == 0) { 4147 return (-1); 4148 } 4149 } else { /* handle metadevices */ 4150 /* get underlying unit & check capabilities */ 4151 if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL) 4152 return (-1); 4153 4154 if ((! (mdp->capabilities & MD_CAN_PARENT)) || 4155 (! (mdp->capabilities & MD_CAN_SP))) 4156 return (mdmderror(ep, MDE_INVAL_UNIT, 4157 meta_getminor(compnp->dev), compnp->cname)); 4158 } 4159 4160 meta_check_sp_ok: 4161 mdclrerror(ep); 4162 return (0); 4163 } 4164 4165 /* 4166 * FUNCTION: meta_create_sp() 4167 * INPUT: sp - the set name to create in 4168 * msp - the unit structure to create 4169 * oblist - an optional list of requested extents (-o/-b options) 4170 * options - creation options 4171 * alignment - data alignment 4172 * OUTPUT: ep - return error pointer 4173 * RETURNS: int - 0 success, -1 error 4174 * PURPOSE: does most of the work for creating a soft partition. If 4175 * metainit -p -e was used, first partition the drive. Then 4176 * create an extent list based on the existing soft partitions 4177 * and assume all space not used by them is free. Storage for 4178 * the new soft partition is allocated from the free extents 4179 * based on the length specified on the command line or the 4180 * oblist passed in. The unit structure is then committed and 4181 * the watermarks are updated. Finally, the status is changed to 4182 * Okay and the process is complete. 4183 */ 4184 static int 4185 meta_create_sp( 4186 mdsetname_t *sp, 4187 md_sp_t *msp, 4188 sp_ext_node_t *oblist, 4189 mdcmdopts_t options, 4190 sp_ext_length_t alignment, 4191 md_error_t *ep 4192 ) 4193 { 4194 mdname_t *np = msp->common.namep; 4195 mdname_t *compnp = msp->compnamep; 4196 mp_unit_t *mp = NULL; 4197 mdnamelist_t *keynlp = NULL, *spnlp = NULL; 4198 md_set_params_t set_params; 4199 int rval = -1; 4200 diskaddr_t comp_size; 4201 diskaddr_t sp_start; 4202 sp_ext_node_t *extlist = NULL; 4203 int numexts = 0; /* number of extents */ 4204 int count = 0; 4205 int committed = 0; 4206 int repart_options = MD_REPART_FORCE; 4207 int create_flag = MD_CRO_32BIT; 4208 4209 md_set_desc *sd; 4210 mm_unit_t *mm; 4211 md_set_mmown_params_t *ownpar = NULL; 4212 int comp_is_mirror = 0; 4213 4214 /* validate soft partition */ 4215 if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0) 4216 return (-1); 4217 4218 if ((options & MDCMD_USE_WHOLE_DISK) != 0) { 4219 if ((options & MDCMD_DOIT) != 0) { 4220 if (meta_repartition_drive(sp, 4221 compnp->drivenamep, 4222 repart_options, 4223 NULL, /* Don't return the VTOC */ 4224 ep) != 0) 4225 4226 return (-1); 4227 } else { 4228 /* 4229 * If -n and -e are both specified, it doesn't make 4230 * sense to continue without actually partitioning 4231 * the drive. 4232 */ 4233 return (0); 4234 } 4235 } 4236 4237 /* populate the start_blk field of the component name */ 4238 if ((sp_start = meta_sp_get_start(sp, compnp, ep)) == 4239 MD_DISKADDR_ERROR) { 4240 rval = -1; 4241 goto out; 4242 } 4243 4244 if (options & MDCMD_DOIT) { 4245 /* store name in namespace */ 4246 if (add_key_name(sp, compnp, &keynlp, ep) != 0) { 4247 rval = -1; 4248 goto out; 4249 } 4250 } 4251 4252 /* 4253 * Get a list of the soft partitions that currently reside on 4254 * the component. We should ALWAYS force reload the cache, 4255 * because if this is a single creation, there will not BE a 4256 * cached list, and if we're using the md.tab, we must rebuild 4257 * the list because it won't contain the previous (if any) 4258 * soft partition. 4259 */ 4260 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4261 if (count < 0) { 4262 /* error occured */ 4263 rval = -1; 4264 goto out; 4265 } 4266 4267 /* 4268 * get the size of the underlying device. if the size is smaller 4269 * than or equal to the watermark size, we know there isn't 4270 * enough space. 4271 */ 4272 if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) { 4273 rval = -1; 4274 goto out; 4275 } else if (comp_size <= MD_SP_WMSIZE) { 4276 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname); 4277 rval = -1; 4278 goto out; 4279 } 4280 /* 4281 * seed extlist with reserved space at the beginning of the volume and 4282 * enough space for the end watermark. The end watermark always gets 4283 * updated, but if the underlying device changes size it may not be 4284 * pointed to until the extent before it is updated. Since the 4285 * end of the reserved space is where the first watermark starts, 4286 * the reserved extent should never be marked for updating. 4287 */ 4288 4289 meta_sp_list_insert(NULL, NULL, &extlist, 4290 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4291 meta_sp_list_insert(NULL, NULL, &extlist, 4292 (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE, 4293 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4294 4295 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4296 rval = -1; 4297 goto out; 4298 } 4299 4300 metafreenamelist(spnlp); 4301 4302 if (getenv(META_SP_DEBUG)) { 4303 meta_sp_debug("meta_create_sp: list of used extents:\n"); 4304 meta_sp_list_dump(extlist); 4305 } 4306 4307 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4308 4309 /* get extent list from -o/-b options or from free space */ 4310 if (options & MDCMD_DIRECT) { 4311 if (getenv(META_SP_DEBUG)) { 4312 meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n"); 4313 meta_sp_list_dump(oblist); 4314 } 4315 4316 numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist); 4317 if (numexts == -1) { 4318 (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname); 4319 rval = -1; 4320 goto out; 4321 } 4322 } else { 4323 numexts = meta_sp_alloc_by_len(sp, np, &extlist, 4324 &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment : 4325 meta_sp_get_default_alignment(sp, compnp, ep)); 4326 if (numexts == -1) { 4327 (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname); 4328 rval = -1; 4329 goto out; 4330 } 4331 } 4332 4333 assert(extlist != NULL); 4334 4335 /* create soft partition */ 4336 mp = meta_sp_createunit(msp->common.namep, msp->compnamep, 4337 extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep); 4338 4339 create_flag = meta_check_devicesize(mp->c.un_total_blocks); 4340 4341 /* if we're not doing anything (metainit -n), return success */ 4342 if (! (options & MDCMD_DOIT)) { 4343 rval = 0; /* success */ 4344 goto out; 4345 } 4346 4347 (void) memset(&set_params, 0, sizeof (set_params)); 4348 4349 if (create_flag == MD_CRO_64BIT) { 4350 mp->c.un_revision = MD_64BIT_META_DEV; 4351 set_params.options = MD_CRO_64BIT; 4352 } else { 4353 mp->c.un_revision = MD_32BIT_META_DEV; 4354 set_params.options = MD_CRO_32BIT; 4355 } 4356 4357 if (getenv(META_SP_DEBUG)) { 4358 meta_sp_debug("meta_create_sp: printing unit structure\n"); 4359 meta_sp_printunit(mp); 4360 } 4361 4362 /* 4363 * Check to see if we're trying to create a partition on a mirror. If so 4364 * we may have to enforce an ownership change before writing the 4365 * watermark out. 4366 */ 4367 if (metaismeta(compnp)) { 4368 char *miscname; 4369 4370 miscname = metagetmiscname(compnp, ep); 4371 if (miscname != NULL) 4372 comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0); 4373 else 4374 comp_is_mirror = 0; 4375 } else { 4376 comp_is_mirror = 0; 4377 } 4378 4379 /* 4380 * For a multi-node environment we have to ensure that the master 4381 * node owns an underlying mirror before we issue the MD_IOCSET ioctl. 4382 * If the master does not own the device we will deadlock as the 4383 * implicit write of the watermarks (in sp_ioctl.c) will cause an 4384 * ownership change that will block as the MD_IOCSET is still in 4385 * progress. To close this window we force an owner change to occur 4386 * before issuing the MD_IOCSET. We cannot simply open the device and 4387 * write to it as this will only work for the first soft-partition 4388 * creation. 4389 */ 4390 4391 if (comp_is_mirror && !metaislocalset(sp)) { 4392 4393 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 4394 rval = -1; 4395 goto out; 4396 } 4397 if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { 4398 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 4399 if (mm == NULL) { 4400 rval = -1; 4401 goto out; 4402 } else { 4403 rval = meta_mn_change_owner(&ownpar, sp->setno, 4404 meta_getminor(compnp->dev), 4405 sd->sd_mn_mynode->nd_nodeid, 4406 MD_MN_MM_PREVENT_CHANGE | 4407 MD_MN_MM_SPAWN_THREAD); 4408 if (rval == -1) 4409 goto out; 4410 } 4411 } 4412 } 4413 4414 set_params.mnum = MD_SID(mp); 4415 set_params.size = mp->c.un_size; 4416 set_params.mdp = (uintptr_t)mp; 4417 MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum)); 4418 4419 /* first phase of commit. */ 4420 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 4421 np->cname) != 0) { 4422 (void) mdstealerror(ep, &set_params.mde); 4423 rval = -1; 4424 goto out; 4425 } 4426 4427 /* we've successfully committed the record */ 4428 committed = 1; 4429 4430 /* write watermarks */ 4431 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 4432 rval = -1; 4433 goto out; 4434 } 4435 4436 /* 4437 * Allow mirror ownership to change. If we don't succeed in this 4438 * ioctl it isn't fatal, but the cluster will probably hang fairly 4439 * soon as the mirror owner won't change. However, we have 4440 * successfully written the watermarks out to the device so the 4441 * softpart creation has succeeded 4442 */ 4443 if (ownpar) { 4444 (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum, 4445 ownpar->d.owner, 4446 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 4447 } 4448 4449 /* second phase of commit, set status to MD_SP_OK */ 4450 if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) { 4451 rval = -1; 4452 goto out; 4453 } 4454 rval = 0; 4455 out: 4456 Free(mp); 4457 if (ownpar) 4458 Free(ownpar); 4459 4460 if (extlist != NULL) 4461 meta_sp_list_free(&extlist); 4462 4463 if (rval != 0 && keynlp != NULL && committed != 1) 4464 (void) del_key_names(sp, keynlp, NULL); 4465 4466 metafreenamelist(keynlp); 4467 4468 return (rval); 4469 } 4470 4471 /* 4472 * ************************************************************************** 4473 * Reset (metaclear) Functions * 4474 * ************************************************************************** 4475 */ 4476 4477 /* 4478 * FUNCTION: meta_sp_reset_common() 4479 * INPUT: sp - the set name of the device to reset 4480 * np - the name of the device to reset 4481 * msp - the unit structure to reset 4482 * options - metaclear options 4483 * OUTPUT: ep - return error pointer 4484 * RETURNS: int - 0 success, -1 error 4485 * PURPOSE: "resets", or more accurately deletes, the soft partition 4486 * specified. First the state is set to "deleting" and then the 4487 * watermarks are all cleared out. Once the watermarks have been 4488 * updated, the unit structure is deleted from the metadb. 4489 */ 4490 static int 4491 meta_sp_reset_common( 4492 mdsetname_t *sp, 4493 mdname_t *np, 4494 md_sp_t *msp, 4495 md_sp_reset_t reset_params, 4496 mdcmdopts_t options, 4497 md_error_t *ep 4498 ) 4499 { 4500 char *miscname; 4501 int rval = -1; 4502 int is_open = 0; 4503 4504 /* make sure that nobody owns us */ 4505 if (MD_HAS_PARENT(msp->common.parent)) 4506 return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev), 4507 np->cname)); 4508 4509 /* make sure that the soft partition isn't open */ 4510 if ((is_open = meta_isopen(sp, np, ep, options)) < 0) 4511 return (-1); 4512 else if (is_open) 4513 return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev), 4514 np->cname)); 4515 4516 /* get miscname */ 4517 if ((miscname = metagetmiscname(np, ep)) == NULL) 4518 return (-1); 4519 4520 /* fill in reset params */ 4521 MD_SETDRIVERNAME(&reset_params, miscname, sp->setno); 4522 reset_params.mnum = meta_getminor(np->dev); 4523 reset_params.force = (options & MDCMD_FORCE) ? 1 : 0; 4524 4525 /* 4526 * clear soft partition - phase one. 4527 * place the soft partition into the "delete pending" state. 4528 */ 4529 if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0) 4530 return (-1); 4531 4532 /* 4533 * Now clear the watermarks. If the force flag is specified, 4534 * ignore any errors writing the watermarks and delete the unit 4535 * structure anyway. An error may leave the on-disk format in a 4536 * corrupt state. If force is not specified and we fail here, 4537 * the soft partition will remain in the "delete pending" state. 4538 */ 4539 if ((meta_sp_clear_wm(sp, msp, ep) < 0) && 4540 ((options & MDCMD_FORCE) == 0)) 4541 goto out; 4542 4543 /* 4544 * clear soft partition - phase two. 4545 * the driver removes the soft partition from the metadb and 4546 * zeros out incore version. 4547 */ 4548 if (metaioctl(MD_IOCRESET, &reset_params, 4549 &reset_params.mde, np->cname) != 0) { 4550 (void) mdstealerror(ep, &reset_params.mde); 4551 goto out; 4552 } 4553 rval = 0; /* success */ 4554 4555 if (options & MDCMD_PRINT) { 4556 (void) printf(dgettext(TEXT_DOMAIN, 4557 "%s: Soft Partition is cleared\n"), 4558 np->cname); 4559 (void) fflush(stdout); 4560 } 4561 4562 /* 4563 * if told to recurse and on a metadevice, then attempt to 4564 * clear the subdevices. Indicate failure if the clear fails. 4565 */ 4566 if ((options & MDCMD_RECURSE) && 4567 (metaismeta(msp->compnamep)) && 4568 (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0)) 4569 rval = -1; 4570 4571 out: 4572 meta_invalidate_name(np); 4573 return (rval); 4574 } 4575 4576 /* 4577 * FUNCTION: meta_sp_reset() 4578 * INPUT: sp - the set name of the device to reset 4579 * np - the name of the device to reset 4580 * options - metaclear options 4581 * OUTPUT: ep - return error pointer 4582 * RETURNS: int - 0 success, -1 error 4583 * PURPOSE: provides the entry point to the rest of libmeta for deleting a 4584 * soft partition. If np is NULL, then soft partitions are 4585 * all deleted at the current level and then recursively deleted. 4586 * Otherwise, if a name is specified either directly or as a 4587 * result of a recursive operation, it deletes only that name. 4588 * Since something sitting under a soft partition may be parented 4589 * to it, we have to reparent that other device to another soft 4590 * partition on the same component if we're deleting the one it's 4591 * parented to. 4592 */ 4593 int 4594 meta_sp_reset( 4595 mdsetname_t *sp, 4596 mdname_t *np, 4597 mdcmdopts_t options, 4598 md_error_t *ep 4599 ) 4600 { 4601 md_sp_t *msp; 4602 int rval = -1; 4603 mdnamelist_t *spnlp = NULL, *nlp = NULL; 4604 md_sp_reset_t reset_params; 4605 int num_sp; 4606 4607 assert(sp != NULL); 4608 4609 /* reset/delete all soft paritions */ 4610 if (np == NULL) { 4611 /* 4612 * meta_reset_all sets MDCMD_RECURSE, but this behavior 4613 * is incorrect for soft partitions. We want to clear 4614 * all soft partitions at a particular level in the 4615 * metadevice stack before moving to the next level. 4616 * Thus, we clear MDCMD_RECURSE from the options. 4617 */ 4618 options &= ~MDCMD_RECURSE; 4619 4620 /* for each soft partition */ 4621 rval = 0; 4622 if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0) 4623 rval = -1; 4624 4625 for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) { 4626 np = nlp->namep; 4627 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4628 rval = -1; 4629 break; 4630 } 4631 /* 4632 * meta_reset_all calls us twice to get soft 4633 * partitions at the top and bottom of the stack. 4634 * thus, if we have a parent, we'll get deleted 4635 * on the next call. 4636 */ 4637 if (MD_HAS_PARENT(msp->common.parent)) 4638 continue; 4639 /* 4640 * If this is a multi-node set, we send a series 4641 * of individual metaclear commands. 4642 */ 4643 if (meta_is_mn_set(sp, ep)) { 4644 if (meta_mn_send_metaclear_command(sp, 4645 np->cname, options, 0, ep) != 0) { 4646 rval = -1; 4647 break; 4648 } 4649 } else { 4650 if (meta_sp_reset(sp, np, options, ep) != 0) { 4651 rval = -1; 4652 break; 4653 } 4654 } 4655 } 4656 /* cleanup return status */ 4657 metafreenamelist(spnlp); 4658 return (rval); 4659 } 4660 4661 /* check the name */ 4662 if (metachkmeta(np, ep) != 0) 4663 return (-1); 4664 4665 /* get the unit structure */ 4666 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4667 return (-1); 4668 4669 /* clear out reset parameters */ 4670 (void) memset(&reset_params, 0, sizeof (reset_params)); 4671 4672 /* if our child is a metadevice, we need to deparent/reparent it */ 4673 if (metaismeta(msp->compnamep)) { 4674 /* get sp's on this component */ 4675 if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep, 4676 &spnlp, 1, ep)) <= 0) 4677 /* no sp's on this device. error! */ 4678 return (-1); 4679 else if (num_sp == 1) 4680 /* last sp on this device, so we deparent */ 4681 reset_params.new_parent = MD_NO_PARENT; 4682 else { 4683 /* have to reparent this metadevice */ 4684 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4685 if (meta_getminor(nlp->namep->dev) == 4686 meta_getminor(np->dev)) 4687 continue; 4688 /* 4689 * this isn't the softpart we are deleting, 4690 * so use this device as the new parent. 4691 */ 4692 reset_params.new_parent = 4693 meta_getminor(nlp->namep->dev); 4694 break; 4695 } 4696 } 4697 metafreenamelist(spnlp); 4698 } 4699 4700 if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0) 4701 return (-1); 4702 4703 return (0); 4704 } 4705 4706 /* 4707 * FUNCTION: meta_sp_reset_component() 4708 * INPUT: sp - the set name of the device to reset 4709 * name - the string name of the device to reset 4710 * options - metaclear options 4711 * OUTPUT: ep - return error pointer 4712 * RETURNS: int - 0 success, -1 error 4713 * PURPOSE: provides the ability to delete all soft partitions on a 4714 * specified device (metaclear -p). It first gets all of the 4715 * soft partitions on the component and then deletes each one 4716 * individually. 4717 */ 4718 int 4719 meta_sp_reset_component( 4720 mdsetname_t *sp, 4721 char *name, 4722 mdcmdopts_t options, 4723 md_error_t *ep 4724 ) 4725 { 4726 mdname_t *compnp, *np; 4727 mdnamelist_t *spnlp = NULL; 4728 mdnamelist_t *nlp = NULL; 4729 md_sp_t *msp; 4730 int count; 4731 md_sp_reset_t reset_params; 4732 4733 if ((compnp = metaname(&sp, name, ep)) == NULL) 4734 return (-1); 4735 4736 /* If we're starting out with no soft partitions, it's an error */ 4737 count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep); 4738 if (count == 0) 4739 return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname)); 4740 else if (count < 0) 4741 return (-1); 4742 4743 /* 4744 * clear all soft partitions on this component. 4745 * NOTE: we reparent underlying metadevices as we go so that 4746 * things stay sane. Also, if we encounter an error, we stop 4747 * and go no further in case recovery might be needed. 4748 */ 4749 for (nlp = spnlp; nlp != NULL; nlp = nlp->next) { 4750 /* clear out reset parameters */ 4751 (void) memset(&reset_params, 0, sizeof (reset_params)); 4752 4753 /* check the name */ 4754 np = nlp->namep; 4755 4756 if (metachkmeta(np, ep) != 0) { 4757 metafreenamelist(spnlp); 4758 return (-1); 4759 } 4760 4761 /* get the unit structure */ 4762 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 4763 metafreenamelist(spnlp); 4764 return (-1); 4765 } 4766 4767 /* have to deparent/reparent metadevices */ 4768 if (metaismeta(compnp)) { 4769 if (nlp->next == NULL) 4770 reset_params.new_parent = MD_NO_PARENT; 4771 else 4772 reset_params.new_parent = 4773 meta_getminor(spnlp->next->namep->dev); 4774 } 4775 4776 /* clear soft partition */ 4777 if (meta_sp_reset_common(sp, np, msp, reset_params, 4778 options, ep) < 0) { 4779 metafreenamelist(spnlp); 4780 return (-1); 4781 } 4782 } 4783 metafreenamelist(spnlp); 4784 return (0); 4785 } 4786 4787 /* 4788 * ************************************************************************** 4789 * Grow (metattach) Functions * 4790 * ************************************************************************** 4791 */ 4792 4793 /* 4794 * FUNCTION: meta_sp_attach() 4795 * INPUT: sp - the set name of the device to attach to 4796 * np - the name of the device to attach to 4797 * addsize - the unparsed string holding the amount of space to add 4798 * options - metattach options 4799 * alignment - data alignment 4800 * OUTPUT: ep - return error pointer 4801 * RETURNS: int - 0 success, -1 error 4802 * PURPOSE: grows a soft partition by reading in the existing unit 4803 * structure and setting its state to Growing, allocating more 4804 * space (similar to meta_create_sp()), updating the watermarks, 4805 * and then writing out the new unit structure in the Okay state. 4806 */ 4807 int 4808 meta_sp_attach( 4809 mdsetname_t *sp, 4810 mdname_t *np, 4811 char *addsize, 4812 mdcmdopts_t options, 4813 sp_ext_length_t alignment, 4814 md_error_t *ep 4815 ) 4816 { 4817 md_grow_params_t grow_params; 4818 sp_ext_length_t grow_len; /* amount to grow */ 4819 mp_unit_t *mp, *new_un; 4820 mdname_t *compnp = NULL; 4821 4822 sp_ext_node_t *extlist = NULL; 4823 int numexts; 4824 mdnamelist_t *spnlp = NULL; 4825 int count; 4826 md_sp_t *msp; 4827 daddr_t start_block; 4828 4829 /* should have the same set */ 4830 assert(sp != NULL); 4831 assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev))); 4832 4833 /* check name */ 4834 if (metachkmeta(np, ep) != 0) 4835 return (-1); 4836 4837 if (meta_sp_parsesize(addsize, &grow_len) == -1) { 4838 return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname)); 4839 } 4840 4841 if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL) 4842 return (-1); 4843 4844 /* make sure we don't have a parent */ 4845 if (MD_HAS_PARENT(mp->c.un_parent)) { 4846 Free(mp); 4847 return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname)); 4848 } 4849 4850 if (getenv(META_SP_DEBUG)) { 4851 meta_sp_debug("meta_sp_attach: Unit structure before new " 4852 "space:\n"); 4853 meta_sp_printunit(mp); 4854 } 4855 4856 /* 4857 * NOTE: the fast option to metakeyname is 0 as opposed to 1 4858 * If this was not the case we would suffer the following 4859 * assertion failure: 4860 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP 4861 * file meta_check.x, line 315 4862 * I guess this is because we have not "seen" this drive before 4863 * and hence hit the failure - this is of course the attach routine 4864 */ 4865 if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) { 4866 Free(mp); 4867 return (-1); 4868 } 4869 4870 /* metakeyname does not fill in the key. */ 4871 compnp->key = mp->un_key; 4872 4873 /* work out the space on the component that we are dealing with */ 4874 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 4875 4876 /* 4877 * see if the component has been soft partitioned yet, or if an 4878 * error occurred. 4879 */ 4880 if (count == 0) { 4881 Free(mp); 4882 return (mdmderror(ep, MDE_NOT_SP, 0, np->cname)); 4883 } else if (count < 0) { 4884 Free(mp); 4885 return (-1); 4886 } 4887 4888 /* 4889 * seed extlist with reserved space at the beginning of the volume and 4890 * enough space for the end watermark. The end watermark always gets 4891 * updated, but if the underlying device changes size it may not be 4892 * pointed to until the extent before it is updated. Since the 4893 * end of the reserved space is where the first watermark starts, 4894 * the reserved extent should never be marked for updating. 4895 */ 4896 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 4897 MD_DISKADDR_ERROR) { 4898 Free(mp); 4899 return (-1); 4900 } 4901 4902 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 4903 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 4904 meta_sp_list_insert(NULL, NULL, &extlist, 4905 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 4906 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 4907 4908 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 4909 Free(mp); 4910 return (-1); 4911 } 4912 4913 metafreenamelist(spnlp); 4914 4915 if (getenv(META_SP_DEBUG)) { 4916 meta_sp_debug("meta_sp_attach: list of used extents:\n"); 4917 meta_sp_list_dump(extlist); 4918 } 4919 4920 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 4921 4922 assert(mp->un_numexts >= 1); 4923 numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len, 4924 mp->un_ext[mp->un_numexts - 1].un_poff, 4925 (alignment > 0) ? alignment : 4926 meta_sp_get_default_alignment(sp, compnp, ep)); 4927 4928 if (numexts == -1) { 4929 Free(mp); 4930 return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname)); 4931 } 4932 4933 /* allocate new unit structure and copy in old unit */ 4934 if ((new_un = meta_sp_updateunit(np, mp, extlist, 4935 grow_len, numexts, ep)) == NULL) { 4936 Free(mp); 4937 return (-1); 4938 } 4939 Free(mp); 4940 4941 /* If running in dryrun mode (-n option), we're done here */ 4942 if ((options & MDCMD_DOIT) == 0) { 4943 if (options & MDCMD_PRINT) { 4944 (void) printf(dgettext(TEXT_DOMAIN, 4945 "%s: Soft Partition would grow\n"), 4946 np->cname); 4947 (void) fflush(stdout); 4948 } 4949 return (0); 4950 } 4951 4952 if (getenv(META_SP_DEBUG)) { 4953 meta_sp_debug("meta_sp_attach: updated unit structure:\n"); 4954 meta_sp_printunit(new_un); 4955 } 4956 4957 assert(new_un != NULL); 4958 4959 (void) memset(&grow_params, 0, sizeof (grow_params)); 4960 if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { 4961 grow_params.options = MD_CRO_64BIT; 4962 new_un->c.un_revision = MD_64BIT_META_DEV; 4963 } else { 4964 grow_params.options = MD_CRO_32BIT; 4965 new_un->c.un_revision = MD_32BIT_META_DEV; 4966 } 4967 grow_params.mnum = MD_SID(new_un); 4968 grow_params.size = new_un->c.un_size; 4969 grow_params.mdp = (uintptr_t)new_un; 4970 MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum)); 4971 4972 if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde, 4973 np->cname) != 0) { 4974 (void) mdstealerror(ep, &grow_params.mde); 4975 return (-1); 4976 } 4977 4978 /* update all watermarks */ 4979 4980 if ((msp = meta_get_sp(sp, np, ep)) == NULL) 4981 return (-1); 4982 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) 4983 return (-1); 4984 4985 4986 /* second phase of commit, set status to MD_SP_OK */ 4987 if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0) 4988 return (-1); 4989 4990 meta_invalidate_name(np); 4991 4992 if (options & MDCMD_PRINT) { 4993 (void) printf(dgettext(TEXT_DOMAIN, 4994 "%s: Soft Partition has been grown\n"), 4995 np->cname); 4996 (void) fflush(stdout); 4997 } 4998 4999 return (0); 5000 } 5001 5002 /* 5003 * ************************************************************************** 5004 * Recovery (metarecover) Functions * 5005 * ************************************************************************** 5006 */ 5007 5008 /* 5009 * FUNCTION: meta_recover_sp() 5010 * INPUT: sp - the name of the set we are recovering on 5011 * compnp - name pointer for device we are recovering on 5012 * argc - argument count 5013 * argv - left over arguments not parsed by metarecover command 5014 * options - metarecover options 5015 * OUTPUT: ep - return error pointer 5016 * RETURNS: int - 0 - success, -1 - error 5017 * PURPOSE: parse soft partitioning-specific metarecover options and 5018 * dispatch to the appropriate function to handle recovery. 5019 */ 5020 int 5021 meta_recover_sp( 5022 mdsetname_t *sp, 5023 mdname_t *compnp, 5024 int argc, 5025 char *argv[], 5026 mdcmdopts_t options, 5027 md_error_t *ep 5028 ) 5029 { 5030 md_set_desc *sd; 5031 5032 if (argc > 1) { 5033 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5034 argc, argv); 5035 return (-1); 5036 } 5037 5038 /* 5039 * For a MN set, this operation must be performed on the master 5040 * as it is responsible for maintaining the watermarks 5041 */ 5042 if (!metaislocalset(sp)) { 5043 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5044 return (-1); 5045 if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) { 5046 (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno, 5047 sd->sd_mn_master_nodenm, NULL, NULL); 5048 return (-1); 5049 } 5050 } 5051 if (argc == 0) { 5052 /* 5053 * if no additional arguments are passed, metarecover should 5054 * validate both on-disk and metadb structures as well as 5055 * checking that both are consistent with each other 5056 */ 5057 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5058 return (-1); 5059 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5060 return (-1); 5061 if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0) 5062 return (-1); 5063 } else if (strcmp(argv[0], "-d") == 0) { 5064 /* 5065 * Ensure that there is no existing valid record for this 5066 * soft-partition. If there is we have nothing to do. 5067 */ 5068 if (meta_sp_validate_unit(sp, compnp, options, ep) == 0) 5069 return (-1); 5070 /* validate and recover from on-disk structures */ 5071 if (meta_sp_validate_wm(sp, compnp, options, ep) < 0) 5072 return (-1); 5073 if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0) 5074 return (-1); 5075 } else if (strcmp(argv[0], "-m") == 0) { 5076 /* validate and recover from metadb structures */ 5077 if (meta_sp_validate_unit(sp, compnp, options, ep) < 0) 5078 return (-1); 5079 if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0) 5080 return (-1); 5081 } else { 5082 /* syntax error */ 5083 (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname, 5084 argc, argv); 5085 return (-1); 5086 } 5087 5088 return (0); 5089 } 5090 5091 /* 5092 * FUNCTION: meta_sp_display_exthdr() 5093 * INPUT: none 5094 * OUTPUT: none 5095 * RETURNS: void 5096 * PURPOSE: print header line for sp_ext_node_t information. to be used 5097 * in conjunction with meta_sp_display_ext(). 5098 */ 5099 static void 5100 meta_sp_display_exthdr(void) 5101 { 5102 (void) printf("%20s %5s %7s %20s %20s\n", 5103 dgettext(TEXT_DOMAIN, "Name"), 5104 dgettext(TEXT_DOMAIN, "Seq#"), 5105 dgettext(TEXT_DOMAIN, "Type"), 5106 dgettext(TEXT_DOMAIN, "Offset"), 5107 dgettext(TEXT_DOMAIN, "Length")); 5108 } 5109 5110 5111 /* 5112 * FUNCTION: meta_sp_display_ext() 5113 * INPUT: ext - extent to display 5114 * OUTPUT: none 5115 * RETURNS: void 5116 * PURPOSE: print selected fields from sp_ext_node_t. 5117 */ 5118 static void 5119 meta_sp_display_ext(sp_ext_node_t *ext) 5120 { 5121 /* print extent information */ 5122 if (ext->ext_namep != NULL) 5123 (void) printf("%20s ", ext->ext_namep->cname); 5124 else 5125 (void) printf("%20s ", "NONE"); 5126 5127 (void) printf("%5u ", ext->ext_seq); 5128 5129 switch (ext->ext_type) { 5130 case EXTTYP_ALLOC: 5131 (void) printf("%7s ", "ALLOC"); 5132 break; 5133 case EXTTYP_FREE: 5134 (void) printf("%7s ", "FREE"); 5135 break; 5136 case EXTTYP_RESERVED: 5137 (void) printf("%7s ", "RESV"); 5138 break; 5139 case EXTTYP_END: 5140 (void) printf("%7s ", "END"); 5141 break; 5142 default: 5143 (void) printf("%7s ", "INVLD"); 5144 break; 5145 } 5146 5147 (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length); 5148 } 5149 5150 5151 /* 5152 * FUNCTION: meta_sp_checkseq() 5153 * INPUT: extlist - list of extents to be checked 5154 * OUTPUT: none 5155 * RETURNS: int - 0 - success, -1 - error 5156 * PURPOSE: check soft partition sequence numbers. this function assumes 5157 * that a list of extents representing 1 or more soft partitions 5158 * is passed in sorted in sequence number order. within a 5159 * single soft partition, there may not be any missing or 5160 * duplicate sequence numbers. 5161 */ 5162 static int 5163 meta_sp_checkseq(sp_ext_node_t *extlist) 5164 { 5165 sp_ext_node_t *ext; 5166 5167 assert(extlist != NULL); 5168 5169 for (ext = extlist; 5170 ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC; 5171 ext = ext->ext_next) { 5172 if (ext->ext_next->ext_namep != NULL && 5173 strcmp(ext->ext_next->ext_namep->cname, 5174 ext->ext_namep->cname) != 0) 5175 continue; 5176 5177 if (ext->ext_next->ext_seq != ext->ext_seq + 1) { 5178 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5179 "%s: sequence numbers are " 5180 "incorrect: %d should be %d\n"), 5181 ext->ext_next->ext_namep->cname, 5182 ext->ext_next->ext_seq, ext->ext_seq + 1); 5183 return (-1); 5184 } 5185 } 5186 return (0); 5187 } 5188 5189 5190 /* 5191 * FUNCTION: meta_sp_resolve_name_conflict() 5192 * INPUT: sp - name of set we're are recovering in. 5193 * old_np - name pointer of soft partition we found on disk. 5194 * OUTPUT: new_np - name pointer for new soft partition name. 5195 * ep - error pointer returned. 5196 * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error 5197 * PURPOSE: Check to see if the name of one of the soft partitions we found 5198 * on disk already exists in the metadb. If so, prompt for a new 5199 * name. In addition, we keep a static array of names that 5200 * will be recovered from this device since these names don't 5201 * exist in the configuration at this point but cannot be 5202 * recovered more than once. 5203 */ 5204 static int 5205 meta_sp_resolve_name_conflict( 5206 mdsetname_t *sp, 5207 mdname_t *old_np, 5208 mdname_t **new_np, 5209 md_error_t *ep 5210 ) 5211 { 5212 char yesno[255]; 5213 char *yes; 5214 char newname[MD_SP_MAX_DEVNAME_PLUS_1]; 5215 int nunits; 5216 static int *used_names = NULL; 5217 5218 assert(old_np != NULL); 5219 5220 if (used_names == NULL) { 5221 if ((nunits = meta_get_nunits(ep)) < 0) 5222 return (-1); 5223 used_names = Zalloc(nunits * sizeof (int)); 5224 } 5225 5226 /* see if it exists already */ 5227 if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 && 5228 metagetmiscname(old_np, ep) == NULL) { 5229 if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5230 return (-1); 5231 else { 5232 used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1; 5233 mdclrerror(ep); 5234 return (0); 5235 } 5236 } 5237 5238 /* name exists, ask the user for a new one */ 5239 (void) printf(dgettext(TEXT_DOMAIN, 5240 "WARNING: A soft partition named %s was found in the extent\n" 5241 "headers, but this name already exists in the metadb " 5242 "configuration.\n" 5243 "In order to continue recovery you must supply\n" 5244 "a new name for this soft partition.\n"), old_np->cname); 5245 (void) printf(dgettext(TEXT_DOMAIN, 5246 "Would you like to continue and supply a new name? (yes/no) ")); 5247 5248 (void) fflush(stdout); 5249 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 5250 (strlen(yesno) == 1)) 5251 (void) snprintf(yesno, sizeof (yesno), "%s\n", 5252 dgettext(TEXT_DOMAIN, "no")); 5253 yes = dgettext(TEXT_DOMAIN, "yes"); 5254 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 5255 return (-1); 5256 } 5257 5258 (void) fflush(stdin); 5259 5260 /* get the new name */ 5261 for (;;) { 5262 (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name " 5263 "for this soft partition (dXXXX) ")); 5264 (void) fflush(stdout); 5265 if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL) 5266 (void) strcpy(newname, ""); 5267 5268 /* remove newline character */ 5269 if (newname[strlen(newname) - 1] == '\n') 5270 newname[strlen(newname) - 1] = '\0'; 5271 5272 if (!(is_metaname(newname)) || 5273 (meta_init_make_device(&sp, newname, ep) != 0)) { 5274 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5275 "Invalid metadevice name\n")); 5276 (void) fflush(stderr); 5277 continue; 5278 } 5279 5280 if ((*new_np = metaname(&sp, newname, ep)) == NULL) { 5281 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5282 "Invalid metadevice name\n")); 5283 (void) fflush(stderr); 5284 continue; 5285 } 5286 5287 assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits); 5288 /* make sure the name isn't already being used */ 5289 if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] || 5290 metagetmiscname(*new_np, ep) != NULL) { 5291 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5292 "That name already exists\n")); 5293 continue; 5294 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) 5295 return (-1); 5296 5297 break; 5298 } 5299 5300 /* got a new name, place in used array and return */ 5301 used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1; 5302 mdclrerror(ep); 5303 return (1); 5304 } 5305 5306 /* 5307 * FUNCTION: meta_sp_validate_wm() 5308 * INPUT: sp - set name we are recovering in 5309 * compnp - name pointer for device we are recovering from 5310 * options - metarecover options 5311 * OUTPUT: ep - error pointer returned 5312 * RETURNS: int - 0 - success, -1 - error 5313 * PURPOSE: validate and display watermark configuration. walk the 5314 * on-disk watermark structures and validate the information 5315 * found within. since a watermark configuration is 5316 * "self-defining", the act of traversing the watermarks 5317 * is part of the validation process. 5318 */ 5319 static int 5320 meta_sp_validate_wm( 5321 mdsetname_t *sp, 5322 mdname_t *compnp, 5323 mdcmdopts_t options, 5324 md_error_t *ep 5325 ) 5326 { 5327 sp_ext_node_t *extlist = NULL; 5328 sp_ext_node_t *ext; 5329 int num_sps = 0; 5330 int rval; 5331 5332 if ((options & MDCMD_VERBOSE) != 0) 5333 (void) printf(dgettext(TEXT_DOMAIN, 5334 "Verifying on-disk structures on %s.\n"), 5335 compnp->cname); 5336 5337 /* 5338 * for each watermark, build an ext_node, place on list. 5339 */ 5340 rval = meta_sp_extlist_from_wm(sp, compnp, &extlist, 5341 meta_sp_cmp_by_nameseq, ep); 5342 5343 if ((options & MDCMD_VERBOSE) != 0) { 5344 /* print out what we found */ 5345 if (extlist == NULL) 5346 (void) printf(dgettext(TEXT_DOMAIN, 5347 "No extent headers found on %s.\n"), 5348 compnp->cname); 5349 else { 5350 (void) printf(dgettext(TEXT_DOMAIN, 5351 "The following extent headers were found on %s.\n"), 5352 compnp->cname); 5353 meta_sp_display_exthdr(); 5354 } 5355 for (ext = extlist; ext != NULL; ext = ext->ext_next) 5356 meta_sp_display_ext(ext); 5357 } 5358 5359 if (rval < 0) { 5360 (void) printf(dgettext(TEXT_DOMAIN, 5361 "%s: On-disk structures invalid or " 5362 "no soft partitions found.\n"), 5363 compnp->cname); 5364 return (-1); 5365 } 5366 5367 assert(extlist != NULL); 5368 5369 /* count number of soft partitions */ 5370 for (ext = extlist; 5371 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5372 ext = ext->ext_next) { 5373 if (ext->ext_next != NULL && 5374 ext->ext_next->ext_namep != NULL && 5375 strcmp(ext->ext_next->ext_namep->cname, 5376 ext->ext_namep->cname) == 0) 5377 continue; 5378 num_sps++; 5379 } 5380 5381 if ((options & MDCMD_VERBOSE) != 0) 5382 (void) printf(dgettext(TEXT_DOMAIN, 5383 "Found %d soft partition(s) on %s.\n"), num_sps, 5384 compnp->cname); 5385 5386 if (num_sps == 0) { 5387 (void) printf(dgettext(TEXT_DOMAIN, 5388 "%s: No soft partitions.\n"), compnp->cname); 5389 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5390 } 5391 5392 /* check sequence numbers */ 5393 if ((options & MDCMD_VERBOSE) != 0) 5394 (void) printf(dgettext(TEXT_DOMAIN, 5395 "Checking sequence numbers.\n")); 5396 5397 if (meta_sp_checkseq(extlist) != 0) 5398 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5399 5400 return (0); 5401 } 5402 5403 /* 5404 * FUNCTION: meta_sp_validate_unit() 5405 * INPUT: sp - name of set we are recovering in 5406 * compnp - name of component we are recovering from 5407 * options - metarecover options 5408 * OUTPUT: ep - error pointer returned 5409 * RETURNS: int - 0 - success, -1 - error 5410 * PURPOSE: validate and display metadb configuration. begin by getting 5411 * all soft partitions built on the specified component. get 5412 * the unit structure for each one and validate the fields within. 5413 */ 5414 static int 5415 meta_sp_validate_unit( 5416 mdsetname_t *sp, 5417 mdname_t *compnp, 5418 mdcmdopts_t options, 5419 md_error_t *ep 5420 ) 5421 { 5422 md_sp_t *msp; 5423 mdnamelist_t *spnlp = NULL; 5424 mdnamelist_t *namep = NULL; 5425 int count; 5426 uint_t extn; 5427 sp_ext_length_t size; 5428 5429 if ((options & MDCMD_VERBOSE) != 0) 5430 (void) printf(dgettext(TEXT_DOMAIN, 5431 "%s: Validating soft partition metadb entries.\n"), 5432 compnp->cname); 5433 5434 if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) 5435 return (-1); 5436 5437 /* get all soft partitions on component */ 5438 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 5439 5440 if (count == 0) { 5441 (void) printf(dgettext(TEXT_DOMAIN, 5442 "%s: No soft partitions.\n"), compnp->cname); 5443 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5444 } else if (count < 0) { 5445 return (-1); 5446 } 5447 5448 /* Now go through the soft partitions and check each one */ 5449 for (namep = spnlp; namep != NULL; namep = namep->next) { 5450 mdname_t *curnp = namep->namep; 5451 sp_ext_offset_t curvoff; 5452 5453 /* get the unit structure */ 5454 if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL) 5455 return (-1); 5456 5457 /* verify generic unit structure parameters */ 5458 if ((options & MDCMD_VERBOSE) != 0) 5459 (void) printf(dgettext(TEXT_DOMAIN, 5460 "\nVerifying device %s.\n"), 5461 curnp->cname); 5462 5463 /* 5464 * MD_SP_LAST is an invalid state and is always the 5465 * highest numbered. 5466 */ 5467 if (msp->status >= MD_SP_LAST) { 5468 (void) printf(dgettext(TEXT_DOMAIN, 5469 "%s: status value %u is out of range.\n"), 5470 curnp->cname, msp->status); 5471 return (mdmderror(ep, MDE_RECOVER_FAILED, 5472 0, curnp->cname)); 5473 } else if ((options & MDCMD_VERBOSE) != 0) { 5474 uint_t tstate = 0; 5475 5476 if (metaismeta(msp->compnamep)) { 5477 if (meta_get_tstate(msp->common.namep->dev, 5478 &tstate, ep) != 0) 5479 return (-1); 5480 } 5481 (void) printf(dgettext(TEXT_DOMAIN, 5482 "%s: Status \"%s\" is valid.\n"), 5483 curnp->cname, meta_sp_status_to_name(msp->status, 5484 tstate & MD_DEV_ERRORED)); 5485 } 5486 5487 /* Now verify each extent */ 5488 if ((options & MDCMD_VERBOSE) != 0) 5489 (void) printf("%14s %21s %21s %21s\n", 5490 dgettext(TEXT_DOMAIN, "Extent Number"), 5491 dgettext(TEXT_DOMAIN, "Virtual Offset"), 5492 dgettext(TEXT_DOMAIN, "Physical Offset"), 5493 dgettext(TEXT_DOMAIN, "Length")); 5494 5495 curvoff = 0ULL; 5496 for (extn = 0; extn < msp->ext.ext_len; extn++) { 5497 md_sp_ext_t *extp = &msp->ext.ext_val[extn]; 5498 5499 if ((options & MDCMD_VERBOSE) != 0) 5500 (void) printf("%14u %21llu %21llu %21llu\n", 5501 extn, extp->voff, extp->poff, extp->len); 5502 5503 if (extp->voff != curvoff) { 5504 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5505 "%s: virtual offset for extent %u " 5506 "is inconsistent, expected %llu, " 5507 "got %llu.\n"), curnp->cname, extn, 5508 curvoff, extp->voff); 5509 return (mdmderror(ep, MDE_RECOVER_FAILED, 5510 0, compnp->cname)); 5511 } 5512 5513 /* make sure extent does not drop off the end */ 5514 if ((extp->poff + extp->len) == size) { 5515 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5516 "%s: extent %u at offset %llu, " 5517 "length %llu exceeds the size of the " 5518 "device, %llu.\n"), curnp->cname, 5519 extn, extp->poff, extp->len, size); 5520 return (mdmderror(ep, MDE_RECOVER_FAILED, 5521 0, compnp->cname)); 5522 } 5523 5524 curvoff += extp->len; 5525 } 5526 } 5527 if (options & MDCMD_PRINT) { 5528 (void) printf(dgettext(TEXT_DOMAIN, 5529 "%s: Soft Partition metadb configuration is valid\n"), 5530 compnp->cname); 5531 } 5532 return (0); 5533 } 5534 5535 /* 5536 * FUNCTION: meta_sp_validate_wm_and_unit() 5537 * INPUT: sp - name of set we are recovering in 5538 * compnp - name of device we are recovering from 5539 * options - metarecover options 5540 * OUTPUT: ep - error pointer returned 5541 * RETURNS: int - 0 - success, -1 error 5542 * PURPOSE: cross-validate and display watermarks and metadb records. 5543 * get both the unit structures for the soft partitions built 5544 * on the specified component and the watermarks found on that 5545 * component and check to make sure they are consistent with 5546 * each other. 5547 */ 5548 static int 5549 meta_sp_validate_wm_and_unit( 5550 mdsetname_t *sp, 5551 mdname_t *np, 5552 mdcmdopts_t options, 5553 md_error_t *ep 5554 ) 5555 { 5556 sp_ext_node_t *wmlist = NULL; 5557 sp_ext_node_t *unitlist = NULL; 5558 sp_ext_node_t *unitext; 5559 sp_ext_node_t *wmext; 5560 sp_ext_offset_t tmpunitoff; 5561 mdnamelist_t *spnlp = NULL; 5562 int count; 5563 int rval = 0; 5564 int verbose = (options & MDCMD_VERBOSE); 5565 5566 /* get unit structure list */ 5567 count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep); 5568 if (count <= 0) 5569 return (-1); 5570 5571 meta_sp_list_insert(NULL, NULL, &unitlist, 5572 metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 5573 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 5574 5575 if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) { 5576 metafreenamelist(spnlp); 5577 return (-1); 5578 } 5579 5580 metafreenamelist(spnlp); 5581 5582 meta_sp_list_freefill(&unitlist, metagetsize(np, ep)); 5583 5584 if (meta_sp_extlist_from_wm(sp, np, &wmlist, 5585 meta_sp_cmp_by_offset, ep) < 0) { 5586 meta_sp_list_free(&unitlist); 5587 return (-1); 5588 } 5589 5590 if (getenv(META_SP_DEBUG)) { 5591 meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n"); 5592 meta_sp_list_dump(unitlist); 5593 meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n"); 5594 meta_sp_list_dump(wmlist); 5595 } 5596 5597 /* 5598 * step through both lists and compare allocated nodes. Free 5599 * nodes and end watermarks may differ between the two but 5600 * that's generally ok, and if they're wrong will typically 5601 * cause misplaced allocated extents. 5602 */ 5603 if (verbose) 5604 (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb " 5605 "allocations match extent headers.\n"), np->cname); 5606 5607 unitext = unitlist; 5608 wmext = wmlist; 5609 while ((wmext != NULL) && (unitext != NULL)) { 5610 /* find next allocated extents in each list */ 5611 while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC) 5612 wmext = wmext->ext_next; 5613 5614 while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC) 5615 unitext = unitext->ext_next; 5616 5617 if (wmext == NULL || unitext == NULL) 5618 break; 5619 5620 if (verbose) { 5621 (void) printf(dgettext(TEXT_DOMAIN, 5622 "Metadb extent:\n")); 5623 meta_sp_display_exthdr(); 5624 meta_sp_display_ext(unitext); 5625 (void) printf(dgettext(TEXT_DOMAIN, 5626 "Extent header extent:\n")); 5627 meta_sp_display_exthdr(); 5628 meta_sp_display_ext(wmext); 5629 (void) printf("\n"); 5630 } 5631 5632 if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0) 5633 rval = -1; 5634 5635 /* 5636 * if the offsets aren't equal, only increment the 5637 * lowest one in hopes of getting the lists back in sync. 5638 */ 5639 tmpunitoff = unitext->ext_offset; 5640 if (unitext->ext_offset <= wmext->ext_offset) 5641 unitext = unitext->ext_next; 5642 if (wmext->ext_offset <= tmpunitoff) 5643 wmext = wmext->ext_next; 5644 } 5645 5646 /* 5647 * if both lists aren't at the end then there are extra 5648 * allocated nodes in one of them. 5649 */ 5650 if (wmext != NULL) { 5651 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5652 "%s: extent headers contain allocations not in " 5653 "the metadb\n\n"), np->cname); 5654 rval = -1; 5655 } 5656 5657 if (unitext != NULL) { 5658 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5659 "%s: metadb contains allocations not in the extent " 5660 "headers\n\n"), np->cname); 5661 rval = -1; 5662 } 5663 5664 if (options & MDCMD_PRINT) { 5665 if (rval == 0) { 5666 (void) printf(dgettext(TEXT_DOMAIN, 5667 "%s: Soft Partition metadb matches extent " 5668 "header configuration\n"), np->cname); 5669 } else { 5670 (void) printf(dgettext(TEXT_DOMAIN, 5671 "%s: Soft Partition metadb does not match extent " 5672 "header configuration\n"), np->cname); 5673 } 5674 } 5675 5676 return (rval); 5677 } 5678 5679 /* 5680 * FUNCTION: meta_sp_validate_exts() 5681 * INPUT: compnp - name pointer for device we are recovering from 5682 * wmext - extent node representing watermark 5683 * unitext - extent node from unit structure 5684 * OUTPUT: ep - return error pointer 5685 * RETURNS: int - 0 - succes, mdmderror return code - error 5686 * PURPOSE: Takes two extent nodes and checks them against each other. 5687 * offset, length, sequence number, set, and name are compared. 5688 */ 5689 static int 5690 meta_sp_validate_exts( 5691 mdname_t *compnp, 5692 sp_ext_node_t *wmext, 5693 sp_ext_node_t *unitext, 5694 md_error_t *ep 5695 ) 5696 { 5697 if (wmext->ext_offset != unitext->ext_offset) { 5698 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5699 "%s: unit structure and extent header offsets differ.\n"), 5700 compnp->cname); 5701 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5702 } 5703 5704 if (wmext->ext_length != unitext->ext_length) { 5705 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5706 "%s: unit structure and extent header lengths differ.\n"), 5707 compnp->cname); 5708 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5709 } 5710 5711 if (wmext->ext_seq != unitext->ext_seq) { 5712 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5713 "%s: unit structure and extent header sequence numbers " 5714 "differ.\n"), compnp->cname); 5715 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5716 } 5717 5718 if (wmext->ext_type != unitext->ext_type) { 5719 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5720 "%s: unit structure and extent header types differ.\n"), 5721 compnp->cname); 5722 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5723 } 5724 5725 /* 5726 * If one has a set pointer and the other doesn't, error. 5727 * If both extents have setnames, then make sure they match 5728 * If both are NULL, it's ok, they match. 5729 */ 5730 if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) { 5731 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5732 "%s: unit structure and extent header set values " 5733 "differ.\n"), compnp->cname); 5734 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5735 } 5736 5737 if (unitext->ext_setp != NULL) { 5738 if (strcmp(unitext->ext_setp->setname, 5739 wmext->ext_setp->setname) != 0) { 5740 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5741 "%s: unit structure and extent header set names " 5742 "differ.\n"), compnp->cname); 5743 return (mdmderror(ep, MDE_RECOVER_FAILED, 5744 0, compnp->cname)); 5745 } 5746 } 5747 5748 /* 5749 * If one has a name pointer and the other doesn't, error. 5750 * If both extents have names, then make sure they match 5751 * If both are NULL, it's ok, they match. 5752 */ 5753 if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) { 5754 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5755 "%s: unit structure and extent header name values " 5756 "differ.\n"), compnp->cname); 5757 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5758 } 5759 5760 if (unitext->ext_namep != NULL) { 5761 if (strcmp(wmext->ext_namep->cname, 5762 unitext->ext_namep->cname) != 0) { 5763 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5764 "%s: unit structure and extent header names " 5765 "differ.\n"), compnp->cname); 5766 return (mdmderror(ep, MDE_RECOVER_FAILED, 5767 0, compnp->cname)); 5768 } 5769 } 5770 5771 return (0); 5772 } 5773 5774 /* 5775 * FUNCTION: update_sp_status() 5776 * INPUT: sp - name of set we are recovering in 5777 * minors - pointer to an array of soft partition minor numbers 5778 * num_sps - number of minor numbers in array 5779 * status - new status to be applied to all soft parts in array 5780 * mn_set - set if current set is a multi-node set 5781 * OUTPUT: ep - return error pointer 5782 * RETURNS: int - 0 - success, -1 - error 5783 * PURPOSE: update status of soft partitions to new status. minors is an 5784 * array of minor numbers to apply the new status to. 5785 * If mn_set is set, a message is sent to all nodes in the 5786 * cluster to update the status locally. 5787 */ 5788 static int 5789 update_sp_status( 5790 mdsetname_t *sp, 5791 minor_t *minors, 5792 int num_sps, 5793 sp_status_t status, 5794 bool_t mn_set, 5795 md_error_t *ep 5796 ) 5797 { 5798 int i; 5799 int err = 0; 5800 5801 if (mn_set) { 5802 md_mn_msg_sp_setstat_t sp_setstat_params; 5803 int result; 5804 md_mn_result_t *resp = NULL; 5805 5806 for (i = 0; i < num_sps; i++) { 5807 sp_setstat_params.sp_setstat_mnum = minors[i]; 5808 sp_setstat_params.sp_setstat_status = status; 5809 5810 result = mdmn_send_message(sp->setno, 5811 MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 5812 (char *)&sp_setstat_params, 5813 sizeof (sp_setstat_params), 5814 &resp, ep); 5815 if (resp != NULL) { 5816 if (resp->mmr_exitval != 0) 5817 err = -1; 5818 free_result(resp); 5819 } 5820 if (result != 0) { 5821 err = -1; 5822 } 5823 } 5824 } else { 5825 if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0) 5826 err = -1; 5827 } 5828 if (err < 0) { 5829 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 5830 "Error updating status on recovered soft " 5831 "partitions.\n")); 5832 } 5833 return (err); 5834 } 5835 5836 /* 5837 * FUNCTION: meta_sp_recover_from_wm() 5838 * INPUT: sp - name of set we are recovering in 5839 * compnp - name pointer for component we are recovering from 5840 * options - metarecover options 5841 * OUTPUT: ep - return error pointer 5842 * RETURNS: int - 0 - success, -1 - error 5843 * PURPOSE: update metadb records to match watermarks. begin by getting 5844 * an extlist representing all soft partitions on the component. 5845 * then build a unit structure for each soft partition. 5846 * notify user of changes, then commit each soft partition to 5847 * the metadb one at a time in the "recovering" state. update 5848 * any watermarks that may need it (to reflect possible name 5849 * changes), and, finally, set the status of all recovered 5850 * partitions to the "OK" state at once. 5851 */ 5852 static int 5853 meta_sp_recover_from_wm( 5854 mdsetname_t *sp, 5855 mdname_t *compnp, 5856 mdcmdopts_t options, 5857 md_error_t *ep 5858 ) 5859 { 5860 sp_ext_node_t *extlist = NULL; 5861 sp_ext_node_t *sp_list = NULL; 5862 sp_ext_node_t *update_list = NULL; 5863 sp_ext_node_t *ext; 5864 sp_ext_node_t *sp_ext; 5865 mp_unit_t *mp; 5866 mp_unit_t **un_array; 5867 int numexts = 0, num_sps = 0, i = 0; 5868 int err = 0; 5869 int not_recovered = 0; 5870 int committed = 0; 5871 sp_ext_length_t sp_length = 0LL; 5872 mdnamelist_t *keynlp = NULL; 5873 mdname_t *np; 5874 mdname_t *new_np; 5875 int new_name; 5876 md_set_params_t set_params; 5877 minor_t *minors = NULL; 5878 char yesno[255]; 5879 char *yes; 5880 bool_t mn_set = 0; 5881 md_set_desc *sd; 5882 mm_unit_t *mm; 5883 md_set_mmown_params_t *ownpar = NULL; 5884 int comp_is_mirror = 0; 5885 5886 /* 5887 * if this component appears in another metadevice already, do 5888 * NOT recover from it. 5889 */ 5890 if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0) 5891 return (-1); 5892 5893 /* set flag if dealing with a MN set */ 5894 if (!metaislocalset(sp)) { 5895 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 5896 return (-1); 5897 } 5898 if (MD_MNSET_DESC(sd)) 5899 mn_set = 1; 5900 } 5901 /* 5902 * for each watermark, build an ext_node, place on list. 5903 */ 5904 if (meta_sp_extlist_from_wm(sp, compnp, &extlist, 5905 meta_sp_cmp_by_nameseq, ep) < 0) 5906 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 5907 5908 assert(extlist != NULL); 5909 5910 /* count number of soft partitions */ 5911 for (ext = extlist; 5912 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5913 ext = ext->ext_next) { 5914 if (ext->ext_next != NULL && 5915 ext->ext_next->ext_namep != NULL && 5916 strcmp(ext->ext_next->ext_namep->cname, 5917 ext->ext_namep->cname) == 0) 5918 continue; 5919 num_sps++; 5920 } 5921 5922 /* allocate array of unit structure pointers */ 5923 un_array = Zalloc(num_sps * sizeof (mp_unit_t *)); 5924 5925 /* 5926 * build unit structures from list of ext_nodes. 5927 */ 5928 for (ext = extlist; 5929 ext != NULL && ext->ext_type == EXTTYP_ALLOC; 5930 ext = ext->ext_next) { 5931 meta_sp_list_insert(ext->ext_setp, ext->ext_namep, 5932 &sp_list, ext->ext_offset, ext->ext_length, 5933 ext->ext_type, ext->ext_seq, ext->ext_flags, 5934 meta_sp_cmp_by_nameseq); 5935 5936 numexts++; 5937 sp_length += ext->ext_length - MD_SP_WMSIZE; 5938 5939 if (ext->ext_next != NULL && 5940 ext->ext_next->ext_namep != NULL && 5941 strcmp(ext->ext_next->ext_namep->cname, 5942 ext->ext_namep->cname) == 0) 5943 continue; 5944 5945 /* 5946 * if we made it here, we are at a soft partition 5947 * boundary in the list. 5948 */ 5949 if (getenv(META_SP_DEBUG)) { 5950 meta_sp_debug("meta_recover_from_wm: dumping wm " 5951 "list:\n"); 5952 meta_sp_list_dump(sp_list); 5953 } 5954 5955 assert(sp_list != NULL); 5956 assert(sp_list->ext_namep != NULL); 5957 5958 if ((new_name = meta_sp_resolve_name_conflict(sp, 5959 sp_list->ext_namep, &new_np, ep)) < 0) { 5960 err = 1; 5961 goto out; 5962 } else if (new_name) { 5963 for (sp_ext = sp_list; 5964 sp_ext != NULL; 5965 sp_ext = sp_ext->ext_next) { 5966 /* 5967 * insert into the update list for 5968 * watermark update. 5969 */ 5970 meta_sp_list_insert(sp_ext->ext_setp, 5971 new_np, &update_list, sp_ext->ext_offset, 5972 sp_ext->ext_length, sp_ext->ext_type, 5973 sp_ext->ext_seq, EXTFLG_UPDATE, 5974 meta_sp_cmp_by_offset); 5975 } 5976 5977 } 5978 if (options & MDCMD_DOIT) { 5979 /* store name in namespace */ 5980 if (mn_set) { 5981 /* send message to all nodes to return key */ 5982 md_mn_msg_addkeyname_t *send_params; 5983 int result; 5984 md_mn_result_t *resp = NULL; 5985 int message_size; 5986 5987 message_size = sizeof (*send_params) + 5988 strlen(compnp->cname) + 1; 5989 send_params = Zalloc(message_size); 5990 send_params->addkeyname_setno = sp->setno; 5991 (void) strcpy(&send_params->addkeyname_name[0], 5992 compnp->cname); 5993 result = mdmn_send_message(sp->setno, 5994 MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, 5995 (char *)send_params, message_size, &resp, 5996 ep); 5997 Free(send_params); 5998 if (resp != NULL) { 5999 if (resp->mmr_exitval >= 0) { 6000 compnp->key = 6001 (mdkey_t)resp->mmr_exitval; 6002 } else { 6003 err = 1; 6004 free_result(resp); 6005 goto out; 6006 } 6007 free_result(resp); 6008 } 6009 if (result != 0) { 6010 err = 1; 6011 goto out; 6012 } 6013 (void) metanamelist_append(&keynlp, compnp); 6014 } else { 6015 if (add_key_name(sp, compnp, &keynlp, 6016 ep) != 0) { 6017 err = 1; 6018 goto out; 6019 } 6020 } 6021 } 6022 6023 /* create the unit structure */ 6024 if ((mp = meta_sp_createunit( 6025 (new_name) ? new_np : sp_list->ext_namep, compnp, 6026 sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) { 6027 err = 1; 6028 goto out; 6029 } 6030 6031 if (getenv(META_SP_DEBUG)) { 6032 meta_sp_debug("meta_sp_recover_from_wm: " 6033 "printing newly created unit structure"); 6034 meta_sp_printunit(mp); 6035 } 6036 6037 /* place in unit structure array */ 6038 un_array[i++] = mp; 6039 6040 /* free sp_list */ 6041 meta_sp_list_free(&sp_list); 6042 sp_list = NULL; 6043 numexts = 0; 6044 sp_length = 0LL; 6045 } 6046 6047 /* display configuration updates */ 6048 (void) printf(dgettext(TEXT_DOMAIN, 6049 "The following soft partitions were found and will be added to\n" 6050 "your metadevice configuration.\n")); 6051 (void) printf("%5s %15s %18s\n", 6052 dgettext(TEXT_DOMAIN, "Name"), 6053 dgettext(TEXT_DOMAIN, "Size"), 6054 dgettext(TEXT_DOMAIN, "No. of Extents")); 6055 for (i = 0; i < num_sps; i++) { 6056 (void) printf("%5s%lu %15llu %9d\n", "d", 6057 MD_MIN2UNIT(MD_SID(un_array[i])), 6058 un_array[i]->un_length, un_array[i]->un_numexts); 6059 } 6060 6061 if (!(options & MDCMD_DOIT)) { 6062 not_recovered = 1; 6063 goto out; 6064 } 6065 6066 /* ask user for confirmation */ 6067 (void) printf(dgettext(TEXT_DOMAIN, 6068 "WARNING: You are about to add one or more soft partition\n" 6069 "metadevices to your metadevice configuration. If there\n" 6070 "appears to be an error in the soft partition(s) displayed\n" 6071 "above, do NOT proceed with this recovery operation.\n")); 6072 (void) printf(dgettext(TEXT_DOMAIN, 6073 "Are you sure you want to do this (yes/no)? ")); 6074 6075 (void) fflush(stdout); 6076 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6077 (strlen(yesno) == 1)) 6078 (void) snprintf(yesno, sizeof (yesno), "%s\n", 6079 dgettext(TEXT_DOMAIN, "no")); 6080 yes = dgettext(TEXT_DOMAIN, "yes"); 6081 if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) { 6082 not_recovered = 1; 6083 goto out; 6084 } 6085 6086 /* commit records one at a time */ 6087 for (i = 0; i < num_sps; i++) { 6088 (void) memset(&set_params, 0, sizeof (set_params)); 6089 set_params.mnum = MD_SID(un_array[i]); 6090 set_params.size = (un_array[i])->c.un_size; 6091 set_params.mdp = (uintptr_t)(un_array[i]); 6092 set_params.options = 6093 meta_check_devicesize(un_array[i]->un_length); 6094 if (set_params.options == MD_CRO_64BIT) { 6095 un_array[i]->c.un_revision = MD_64BIT_META_DEV; 6096 } else { 6097 un_array[i]->c.un_revision = MD_32BIT_META_DEV; 6098 } 6099 MD_SETDRIVERNAME(&set_params, MD_SP, 6100 MD_MIN2SET(set_params.mnum)); 6101 6102 np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep); 6103 6104 /* 6105 * If this is an MN set, send the MD_IOCSET ioctl to all nodes 6106 */ 6107 if (mn_set) { 6108 md_mn_msg_iocset_t send_params; 6109 int result; 6110 md_mn_result_t *resp = NULL; 6111 int mess_size; 6112 6113 /* 6114 * Calculate message size. md_mn_msg_iocset_t only 6115 * contains one extent, so increment the size to 6116 * include all extents 6117 */ 6118 mess_size = sizeof (send_params) - 6119 sizeof (mp_ext_t) + 6120 (un_array[i]->un_numexts * sizeof (mp_ext_t)); 6121 6122 send_params.iocset_params = set_params; 6123 (void) memcpy(&send_params.unit, un_array[i], 6124 sizeof (*un_array[i]) - sizeof (mp_ext_t) + 6125 (un_array[i]->un_numexts * sizeof (mp_ext_t))); 6126 result = mdmn_send_message(sp->setno, 6127 MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 6128 (char *)&send_params, mess_size, &resp, 6129 ep); 6130 if (resp != NULL) { 6131 if (resp->mmr_exitval != 0) 6132 err = 1; 6133 free_result(resp); 6134 } 6135 if (result != 0) { 6136 err = 1; 6137 } 6138 } else { 6139 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 6140 np->cname) != 0) { 6141 err = 1; 6142 } 6143 } 6144 6145 if (err == 1) { 6146 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6147 "%s: Error committing record to metadb.\n"), 6148 np->cname); 6149 goto out; 6150 } 6151 6152 /* note that we've committed a record */ 6153 if (!committed) 6154 committed = 1; 6155 6156 /* update any watermarks that need it */ 6157 if (update_list != NULL) { 6158 md_sp_t *msp; 6159 6160 /* 6161 * Check to see if we're trying to create a partition 6162 * on a mirror. If so we may have to enforce an 6163 * ownership change before writing the watermark out. 6164 */ 6165 if (metaismeta(compnp)) { 6166 char *miscname; 6167 6168 miscname = metagetmiscname(compnp, ep); 6169 if (miscname != NULL) 6170 comp_is_mirror = (strcmp(miscname, 6171 MD_MIRROR) == 0); 6172 else 6173 comp_is_mirror = 0; 6174 } 6175 /* 6176 * If this is a MN set and the component is a mirror, 6177 * change ownership to this node in order to write the 6178 * watermarks 6179 */ 6180 if (mn_set && comp_is_mirror) { 6181 mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); 6182 if (mm == NULL) { 6183 err = 1; 6184 goto out; 6185 } else { 6186 err = meta_mn_change_owner(&ownpar, 6187 sp->setno, 6188 meta_getminor(compnp->dev), 6189 sd->sd_mn_mynode->nd_nodeid, 6190 MD_MN_MM_PREVENT_CHANGE | 6191 MD_MN_MM_SPAWN_THREAD); 6192 if (err != 0) 6193 goto out; 6194 } 6195 } 6196 6197 if ((msp = meta_get_sp(sp, np, ep)) == NULL) { 6198 err = 1; 6199 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6200 "%s: Error updating extent headers.\n"), 6201 np->cname); 6202 goto out; 6203 } 6204 if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) { 6205 err = 1; 6206 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 6207 "%s: Error updating extent headers " 6208 "on disk.\n"), np->cname); 6209 goto out; 6210 } 6211 } 6212 /* 6213 * If we have changed ownership earlier and prevented any 6214 * ownership changes, we can now allow ownership changes 6215 * again. 6216 */ 6217 if (ownpar) { 6218 (void) meta_mn_change_owner(&ownpar, sp->setno, 6219 ownpar->d.mnum, 6220 ownpar->d.owner, 6221 MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); 6222 } 6223 } 6224 6225 /* update status of all soft partitions to OK */ 6226 minors = Zalloc(num_sps * sizeof (minor_t)); 6227 for (i = 0; i < num_sps; i++) 6228 minors[i] = MD_SID(un_array[i]); 6229 6230 err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep); 6231 if (err != 0) 6232 goto out; 6233 6234 if (options & MDCMD_PRINT) 6235 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6236 "Soft Partitions recovered from device.\n"), 6237 compnp->cname); 6238 out: 6239 /* free memory */ 6240 if (extlist != NULL) 6241 meta_sp_list_free(&extlist); 6242 if (sp_list != NULL) 6243 meta_sp_list_free(&sp_list); 6244 if (update_list != NULL) 6245 meta_sp_list_free(&update_list); 6246 if (un_array != NULL) { 6247 for (i = 0; i < num_sps; i++) 6248 Free(un_array[i]); 6249 Free(un_array); 6250 } 6251 if (minors != NULL) 6252 Free(minors); 6253 if (ownpar != NULL) 6254 Free(ownpar); 6255 (void) fflush(stdout); 6256 6257 if ((keynlp != NULL) && (committed != 1)) { 6258 /* 6259 * if we haven't committed any softparts, either because of an 6260 * error or because the user decided not to proceed, delete 6261 * namelist key for the component 6262 */ 6263 if (mn_set) { 6264 mdnamelist_t *p; 6265 6266 for (p = keynlp; (p != NULL); p = p->next) { 6267 mdname_t *np = p->namep; 6268 md_mn_msg_delkeyname_t send_params; 6269 md_mn_result_t *resp = NULL; 6270 6271 send_params.delkeyname_dev = np->dev; 6272 send_params.delkeyname_setno = sp->setno; 6273 send_params.delkeyname_key = np->key; 6274 (void) mdmn_send_message(sp->setno, 6275 MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, 6276 (char *)&send_params, sizeof (send_params), 6277 &resp, ep); 6278 if (resp != NULL) { 6279 free_result(resp); 6280 } 6281 } 6282 } else { 6283 (void) del_key_names(sp, keynlp, NULL); 6284 } 6285 } 6286 6287 metafreenamelist(keynlp); 6288 6289 if (err) 6290 return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname)); 6291 6292 if (not_recovered) 6293 if (options & MDCMD_PRINT) 6294 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6295 "Soft Partitions NOT recovered from device.\n"), 6296 compnp->cname); 6297 return (0); 6298 } 6299 6300 /* 6301 * FUNCTION: meta_sp_recover_from_unit() 6302 * INPUT: sp - name of set we are recovering in 6303 * compnp - name of component we are recovering from 6304 * options - metarecover options 6305 * OUTPUT: ep - return error pointer 6306 * RETURNS: int - 0 - success, -1 - error 6307 * PURPOSE: update watermarks to match metadb records. begin by getting 6308 * a namelist representing all soft partitions on the specified 6309 * component. then, build an extlist representing the soft 6310 * partitions, filling in the freespace extents. notify user 6311 * of changes, place all soft partitions into the "recovering" 6312 * state and update the watermarks. finally, return all soft 6313 * partitions to the "OK" state. 6314 */ 6315 static int 6316 meta_sp_recover_from_unit( 6317 mdsetname_t *sp, 6318 mdname_t *compnp, 6319 mdcmdopts_t options, 6320 md_error_t *ep 6321 ) 6322 { 6323 mdnamelist_t *spnlp = NULL; 6324 mdnamelist_t *nlp = NULL; 6325 sp_ext_node_t *ext = NULL; 6326 sp_ext_node_t *extlist = NULL; 6327 int count; 6328 char yesno[255]; 6329 char *yes; 6330 int rval = 0; 6331 minor_t *minors = NULL; 6332 int i; 6333 md_sp_t *msp; 6334 md_set_desc *sd; 6335 bool_t mn_set = 0; 6336 daddr_t start_block; 6337 6338 count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep); 6339 if (count <= 0) 6340 return (-1); 6341 6342 /* set flag if dealing with a MN set */ 6343 if (!metaislocalset(sp)) { 6344 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 6345 return (-1); 6346 } 6347 if (MD_MNSET_DESC(sd)) 6348 mn_set = 1; 6349 } 6350 /* 6351 * Save the XDR unit structure for one of the soft partitions; 6352 * we'll use this later to provide metadevice context to 6353 * update the watermarks so the device can be resolved by 6354 * devid instead of dev_t. 6355 */ 6356 if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) { 6357 metafreenamelist(spnlp); 6358 return (-1); 6359 } 6360 6361 if ((start_block = meta_sp_get_start(sp, compnp, ep)) == 6362 MD_DISKADDR_ERROR) { 6363 return (-1); 6364 } 6365 6366 meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block, 6367 EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset); 6368 meta_sp_list_insert(NULL, NULL, &extlist, 6369 metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE, 6370 EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset); 6371 6372 if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) { 6373 metafreenamelist(spnlp); 6374 return (-1); 6375 } 6376 6377 assert(extlist != NULL); 6378 if ((options & MDCMD_VERBOSE) != 0) { 6379 (void) printf(dgettext(TEXT_DOMAIN, 6380 "Updating extent headers on device %s from metadb.\n\n"), 6381 compnp->cname); 6382 (void) printf(dgettext(TEXT_DOMAIN, 6383 "The following extent headers will be written:\n")); 6384 meta_sp_display_exthdr(); 6385 } 6386 6387 meta_sp_list_freefill(&extlist, metagetsize(compnp, ep)); 6388 6389 for (ext = extlist; ext != NULL; ext = ext->ext_next) { 6390 6391 /* mark every node for updating except the reserved space */ 6392 if (ext->ext_type != EXTTYP_RESERVED) { 6393 ext->ext_flags |= EXTFLG_UPDATE; 6394 6395 /* print extent information */ 6396 if ((options & MDCMD_VERBOSE) != 0) 6397 meta_sp_display_ext(ext); 6398 } 6399 } 6400 6401 /* request verification and then update all watermarks */ 6402 if ((options & MDCMD_DOIT) != 0) { 6403 6404 (void) printf(dgettext(TEXT_DOMAIN, 6405 "\nWARNING: You are about to overwrite portions of %s\n" 6406 "with soft partition metadata. The extent headers will be\n" 6407 "written to match the existing metadb configuration. If\n" 6408 "the device was not previously setup with this\n" 6409 "configuration, data loss may result.\n\n"), 6410 compnp->cname); 6411 (void) printf(dgettext(TEXT_DOMAIN, 6412 "Are you sure you want to do this (yes/no)? ")); 6413 6414 (void) fflush(stdout); 6415 if ((fgets(yesno, sizeof (yesno), stdin) == NULL) || 6416 (strlen(yesno) == 1)) 6417 (void) snprintf(yesno, sizeof (yesno), 6418 "%s\n", dgettext(TEXT_DOMAIN, "no")); 6419 yes = dgettext(TEXT_DOMAIN, "yes"); 6420 if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) { 6421 /* place soft partitions into recovering state */ 6422 minors = Zalloc(count * sizeof (minor_t)); 6423 for (nlp = spnlp, i = 0; 6424 nlp != NULL && i < count; 6425 nlp = nlp->next, i++) { 6426 assert(nlp->namep != NULL); 6427 minors[i] = meta_getminor(nlp->namep->dev); 6428 } 6429 if (update_sp_status(sp, minors, count, 6430 MD_SP_RECOVER, mn_set, ep) != 0) { 6431 rval = -1; 6432 goto out; 6433 } 6434 6435 /* update the watermarks */ 6436 if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { 6437 rval = -1; 6438 goto out; 6439 } 6440 6441 if (options & MDCMD_PRINT) { 6442 (void) printf(dgettext(TEXT_DOMAIN, "%s: " 6443 "Soft Partitions recovered from metadb\n"), 6444 compnp->cname); 6445 } 6446 6447 /* return soft partitions to the OK state */ 6448 if (update_sp_status(sp, minors, count, 6449 MD_SP_OK, mn_set, ep) != 0) { 6450 rval = -1; 6451 goto out; 6452 } 6453 6454 rval = 0; 6455 goto out; 6456 } 6457 } 6458 6459 if (options & MDCMD_PRINT) { 6460 (void) printf(dgettext(TEXT_DOMAIN, 6461 "%s: Soft Partitions NOT recovered from metadb\n"), 6462 compnp->cname); 6463 } 6464 6465 out: 6466 if (minors != NULL) 6467 Free(minors); 6468 metafreenamelist(spnlp); 6469 meta_sp_list_free(&extlist); 6470 (void) fflush(stdout); 6471 return (rval); 6472 } 6473 6474 6475 /* 6476 * FUNCTION: meta_sp_update_abr() 6477 * INPUT: sp - name of set we are recovering in 6478 * OUTPUT: ep - return error pointer 6479 * RETURNS: int - 0 - success, -1 - error 6480 * PURPOSE: update the ABR state for all soft partitions in the set. This 6481 * is called when joining a set. It sends a message to the master 6482 * node for each soft partition to get the value of tstate and 6483 * then sets ABR ,if required, by opening the sp, setting ABR 6484 * and then closing the sp. This approach is taken rather that 6485 * just issuing the MD_MN_SET_CAP ioctl, in order to deal with 6486 * the case when we have another node simultaneously unsetting ABR. 6487 */ 6488 int 6489 meta_sp_update_abr( 6490 mdsetname_t *sp, 6491 md_error_t *ep 6492 ) 6493 { 6494 mdnamelist_t *devnlp = NULL; 6495 mdnamelist_t *p; 6496 mdname_t *devnp = NULL; 6497 md_unit_t *un; 6498 char fname[MAXPATHLEN]; 6499 int mnum, fd; 6500 volcap_t vc; 6501 uint_t tstate; 6502 6503 6504 if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { 6505 return (-1); 6506 } 6507 6508 /* Exit if no soft partitions in this set */ 6509 if (devnlp == NULL) 6510 return (0); 6511 6512 /* For each soft partition */ 6513 for (p = devnlp; (p != NULL); p = p->next) { 6514 devnp = p->namep; 6515 6516 /* check if this is a top level metadevice */ 6517 if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL) 6518 goto out; 6519 if (MD_HAS_PARENT(MD_PARENT(un))) { 6520 Free(un); 6521 continue; 6522 } 6523 Free(un); 6524 6525 /* Get tstate from Master */ 6526 if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) { 6527 mdname_t *np; 6528 np = metamnumname(&sp, meta_getminor(devnp->dev), 0, 6529 ep); 6530 if (np) { 6531 md_perror(dgettext(TEXT_DOMAIN, 6532 "Unable to get tstate for %s"), np->cname); 6533 } 6534 continue; 6535 } 6536 /* If not set on the master, nothing to do */ 6537 if (!(tstate & MD_ABR_CAP)) 6538 continue; 6539 6540 mnum = meta_getminor(devnp->dev); 6541 (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u", 6542 sp->setname, (unsigned)MD_MIN2UNIT(mnum)); 6543 if ((fd = open(fname, O_RDWR, 0)) < 0) { 6544 md_perror(dgettext(TEXT_DOMAIN, 6545 "Could not open device %s"), fname); 6546 continue; 6547 } 6548 6549 /* Set ABR state */ 6550 vc.vc_info = 0; 6551 vc.vc_set = 0; 6552 if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { 6553 (void) close(fd); 6554 continue; 6555 } 6556 6557 vc.vc_set = DKV_ABR_CAP; 6558 if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { 6559 (void) close(fd); 6560 goto out; 6561 } 6562 6563 (void) close(fd); 6564 } 6565 metafreenamelist(devnlp); 6566 return (0); 6567 out: 6568 metafreenamelist(devnlp); 6569 return (-1); 6570 } 6571 6572 /* 6573 * FUNCTION: meta_mn_sp_update_abr() 6574 * INPUT: arg - Given set. 6575 * PURPOSE: update the ABR state for all soft partitions in the set by 6576 * forking a process to call meta_sp_update_abr() 6577 * This function is only called via rpc.metad when adding a node 6578 * to a set, ie this node is beong joined to the set by another 6579 * node. 6580 */ 6581 void * 6582 meta_mn_sp_update_abr(void *arg) 6583 { 6584 set_t setno = *((set_t *)arg); 6585 mdsetname_t *sp; 6586 md_error_t mde = mdnullerror; 6587 int fval; 6588 6589 /* should have a set */ 6590 assert(setno != NULL); 6591 6592 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6593 mde_perror(&mde, ""); 6594 return (NULL); 6595 } 6596 6597 if (!(meta_is_mn_set(sp, &mde))) { 6598 mde_perror(&mde, ""); 6599 return (NULL); 6600 } 6601 6602 /* fork a process */ 6603 if ((fval = md_daemonize(sp, &mde)) != 0) { 6604 /* 6605 * md_daemonize will fork off a process. The is the 6606 * parent or error. 6607 */ 6608 if (fval > 0) { 6609 return (NULL); 6610 } 6611 mde_perror(&mde, ""); 6612 return (NULL); 6613 } 6614 /* 6615 * Child process should never return back to rpc.metad, but 6616 * should exit. 6617 * Flush all internally cached data inherited from parent process 6618 * since cached data will be cleared when parent process RPC request 6619 * has completed (which is possibly before this child process 6620 * can complete). 6621 * Child process can retrieve and cache its own copy of data from 6622 * rpc.metad that won't be changed by the parent process. 6623 * 6624 * Reset md_in_daemon since this child will be a client of rpc.metad 6625 * not part of the rpc.metad daemon itself. 6626 * md_in_daemon is used by rpc.metad so that libmeta can tell if 6627 * this thread is rpc.metad or any other thread. (If this thread 6628 * was rpc.metad it could use some short circuit code to get data 6629 * directly from rpc.metad instead of doing an RPC call to rpc.metad). 6630 */ 6631 md_in_daemon = 0; 6632 metaflushsetname(sp); 6633 sr_cache_flush_setno(setno); 6634 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 6635 mde_perror(&mde, ""); 6636 md_exit(sp, 1); 6637 } 6638 6639 6640 /* 6641 * Closing stdin/out/err here. 6642 */ 6643 (void) close(0); 6644 (void) close(1); 6645 (void) close(2); 6646 assert(fval == 0); 6647 6648 (void) meta_sp_update_abr(sp, &mde); 6649 6650 md_exit(sp, 0); 6651 /*NOTREACHED*/ 6652 return (NULL); 6653 } 6654