1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 Alexander Motin <mav@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/bio.h> 34 #include <sys/endian.h> 35 #include <sys/kernel.h> 36 #include <sys/kobj.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/systm.h> 42 #include <sys/time.h> 43 #include <sys/clock.h> 44 #include <geom/geom.h> 45 #include "geom/raid/g_raid.h" 46 #include "geom/raid/md_ddf.h" 47 #include "g_raid_md_if.h" 48 49 static MALLOC_DEFINE(M_MD_DDF, "md_ddf_data", "GEOM_RAID DDF metadata"); 50 51 #define DDF_MAX_DISKS_HARD 128 52 53 #define DDF_MAX_DISKS 16 54 #define DDF_MAX_VDISKS 7 55 #define DDF_MAX_PARTITIONS 1 56 57 #define DECADE (3600*24*(365*10+2)) /* 10 years in seconds. */ 58 59 struct ddf_meta { 60 u_int sectorsize; 61 u_int bigendian; 62 struct ddf_header *hdr; 63 struct ddf_cd_record *cdr; 64 struct ddf_pd_record *pdr; 65 struct ddf_vd_record *vdr; 66 void *cr; 67 struct ddf_pdd_record *pdd; 68 struct ddf_bbm_log *bbm; 69 }; 70 71 struct ddf_vol_meta { 72 u_int sectorsize; 73 u_int bigendian; 74 struct ddf_header *hdr; 75 struct ddf_cd_record *cdr; 76 struct ddf_vd_entry *vde; 77 struct ddf_vdc_record *vdc; 78 struct ddf_vdc_record *bvdc[DDF_MAX_DISKS_HARD]; 79 }; 80 81 struct g_raid_md_ddf_perdisk { 82 struct ddf_meta pd_meta; 83 }; 84 85 struct g_raid_md_ddf_pervolume { 86 struct ddf_vol_meta pv_meta; 87 int pv_started; 88 struct callout pv_start_co; /* STARTING state timer. */ 89 }; 90 91 struct g_raid_md_ddf_object { 92 struct g_raid_md_object mdio_base; 93 u_int mdio_bigendian; 94 struct ddf_meta mdio_meta; 95 int mdio_starting; 96 struct callout mdio_start_co; /* STARTING state timer. */ 97 int mdio_started; 98 struct root_hold_token *mdio_rootmount; /* Root mount delay token. */ 99 }; 100 101 static g_raid_md_create_req_t g_raid_md_create_req_ddf; 102 static g_raid_md_taste_t g_raid_md_taste_ddf; 103 static g_raid_md_event_t g_raid_md_event_ddf; 104 static g_raid_md_volume_event_t g_raid_md_volume_event_ddf; 105 static g_raid_md_ctl_t g_raid_md_ctl_ddf; 106 static g_raid_md_write_t g_raid_md_write_ddf; 107 static g_raid_md_fail_disk_t g_raid_md_fail_disk_ddf; 108 static g_raid_md_free_disk_t g_raid_md_free_disk_ddf; 109 static g_raid_md_free_volume_t g_raid_md_free_volume_ddf; 110 static g_raid_md_free_t g_raid_md_free_ddf; 111 112 static kobj_method_t g_raid_md_ddf_methods[] = { 113 KOBJMETHOD(g_raid_md_create_req, g_raid_md_create_req_ddf), 114 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_ddf), 115 KOBJMETHOD(g_raid_md_event, g_raid_md_event_ddf), 116 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_ddf), 117 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_ddf), 118 KOBJMETHOD(g_raid_md_write, g_raid_md_write_ddf), 119 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_ddf), 120 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_ddf), 121 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_ddf), 122 KOBJMETHOD(g_raid_md_free, g_raid_md_free_ddf), 123 { 0, 0 } 124 }; 125 126 static struct g_raid_md_class g_raid_md_ddf_class = { 127 "DDF", 128 g_raid_md_ddf_methods, 129 sizeof(struct g_raid_md_ddf_object), 130 .mdc_enable = 1, 131 .mdc_priority = 100 132 }; 133 134 #define GET8(m, f) ((m)->f) 135 #define GET16(m, f) ((m)->bigendian ? be16dec(&(m)->f) : le16dec(&(m)->f)) 136 #define GET32(m, f) ((m)->bigendian ? be32dec(&(m)->f) : le32dec(&(m)->f)) 137 #define GET64(m, f) ((m)->bigendian ? be64dec(&(m)->f) : le64dec(&(m)->f)) 138 #define GET8D(m, f) (f) 139 #define GET16D(m, f) ((m)->bigendian ? be16dec(&f) : le16dec(&f)) 140 #define GET32D(m, f) ((m)->bigendian ? be32dec(&f) : le32dec(&f)) 141 #define GET64D(m, f) ((m)->bigendian ? be64dec(&f) : le64dec(&f)) 142 #define GET8P(m, f) (*(f)) 143 #define GET16P(m, f) ((m)->bigendian ? be16dec(f) : le16dec(f)) 144 #define GET32P(m, f) ((m)->bigendian ? be32dec(f) : le32dec(f)) 145 #define GET64P(m, f) ((m)->bigendian ? be64dec(f) : le64dec(f)) 146 147 #define SET8P(m, f, v) \ 148 (*(f) = (v)) 149 #define SET16P(m, f, v) \ 150 do { \ 151 if ((m)->bigendian) \ 152 be16enc((f), (v)); \ 153 else \ 154 le16enc((f), (v)); \ 155 } while (0) 156 #define SET32P(m, f, v) \ 157 do { \ 158 if ((m)->bigendian) \ 159 be32enc((f), (v)); \ 160 else \ 161 le32enc((f), (v)); \ 162 } while (0) 163 #define SET64P(m, f, v) \ 164 do { \ 165 if ((m)->bigendian) \ 166 be64enc((f), (v)); \ 167 else \ 168 le64enc((f), (v)); \ 169 } while (0) 170 #define SET8(m, f, v) SET8P((m), &((m)->f), (v)) 171 #define SET16(m, f, v) SET16P((m), &((m)->f), (v)) 172 #define SET32(m, f, v) SET32P((m), &((m)->f), (v)) 173 #define SET64(m, f, v) SET64P((m), &((m)->f), (v)) 174 #define SET8D(m, f, v) SET8P((m), &(f), (v)) 175 #define SET16D(m, f, v) SET16P((m), &(f), (v)) 176 #define SET32D(m, f, v) SET32P((m), &(f), (v)) 177 #define SET64D(m, f, v) SET64P((m), &(f), (v)) 178 179 #define GETCRNUM(m) (GET32((m), hdr->cr_length) / \ 180 GET16((m), hdr->Configuration_Record_Length)) 181 182 #define GETVDCPTR(m, n) ((struct ddf_vdc_record *)((uint8_t *)(m)->cr + \ 183 (n) * GET16((m), hdr->Configuration_Record_Length) * \ 184 (m)->sectorsize)) 185 186 #define GETSAPTR(m, n) ((struct ddf_sa_record *)((uint8_t *)(m)->cr + \ 187 (n) * GET16((m), hdr->Configuration_Record_Length) * \ 188 (m)->sectorsize)) 189 190 static int 191 isff(uint8_t *buf, int size) 192 { 193 int i; 194 195 for (i = 0; i < size; i++) 196 if (buf[i] != 0xff) 197 return (0); 198 return (1); 199 } 200 201 static void 202 print_guid(uint8_t *buf) 203 { 204 int i, ascii; 205 206 ascii = 1; 207 for (i = 0; i < 24; i++) { 208 if (buf[i] != 0 && (buf[i] < ' ' || buf[i] > 127)) { 209 ascii = 0; 210 break; 211 } 212 } 213 if (ascii) { 214 printf("'%.24s'", buf); 215 } else { 216 for (i = 0; i < 24; i++) 217 printf("%02x", buf[i]); 218 } 219 } 220 221 static void 222 g_raid_md_ddf_print(struct ddf_meta *meta) 223 { 224 struct ddf_vdc_record *vdc; 225 struct ddf_vuc_record *vuc; 226 struct ddf_sa_record *sa; 227 uint64_t *val2; 228 uint32_t val; 229 int i, j, k, num, num2; 230 231 if (g_raid_debug < 1) 232 return; 233 234 printf("********* DDF Metadata *********\n"); 235 printf("**** Header ****\n"); 236 printf("DDF_Header_GUID "); 237 print_guid(meta->hdr->DDF_Header_GUID); 238 printf("\n"); 239 printf("DDF_rev %8.8s\n", (char *)&meta->hdr->DDF_rev[0]); 240 printf("Sequence_Number 0x%08x\n", GET32(meta, hdr->Sequence_Number)); 241 printf("TimeStamp 0x%08x\n", GET32(meta, hdr->TimeStamp)); 242 printf("Open_Flag 0x%02x\n", GET16(meta, hdr->Open_Flag)); 243 printf("Foreign_Flag 0x%02x\n", GET16(meta, hdr->Foreign_Flag)); 244 printf("Diskgrouping 0x%02x\n", GET16(meta, hdr->Diskgrouping)); 245 printf("Primary_Header_LBA %ju\n", GET64(meta, hdr->Primary_Header_LBA)); 246 printf("Secondary_Header_LBA %ju\n", GET64(meta, hdr->Secondary_Header_LBA)); 247 printf("WorkSpace_Length %u\n", GET32(meta, hdr->WorkSpace_Length)); 248 printf("WorkSpace_LBA %ju\n", GET64(meta, hdr->WorkSpace_LBA)); 249 printf("Max_PD_Entries %u\n", GET16(meta, hdr->Max_PD_Entries)); 250 printf("Max_VD_Entries %u\n", GET16(meta, hdr->Max_VD_Entries)); 251 printf("Max_Partitions %u\n", GET16(meta, hdr->Max_Partitions)); 252 printf("Configuration_Record_Length %u\n", GET16(meta, hdr->Configuration_Record_Length)); 253 printf("Max_Primary_Element_Entries %u\n", GET16(meta, hdr->Max_Primary_Element_Entries)); 254 printf("Controller Data %u:%u\n", GET32(meta, hdr->cd_section), GET32(meta, hdr->cd_length)); 255 printf("Physical Disk %u:%u\n", GET32(meta, hdr->pdr_section), GET32(meta, hdr->pdr_length)); 256 printf("Virtual Disk %u:%u\n", GET32(meta, hdr->vdr_section), GET32(meta, hdr->vdr_length)); 257 printf("Configuration Recs %u:%u\n", GET32(meta, hdr->cr_section), GET32(meta, hdr->cr_length)); 258 printf("Physical Disk Recs %u:%u\n", GET32(meta, hdr->pdd_section), GET32(meta, hdr->pdd_length)); 259 printf("BBM Log %u:%u\n", GET32(meta, hdr->bbmlog_section), GET32(meta, hdr->bbmlog_length)); 260 printf("Diagnostic Space %u:%u\n", GET32(meta, hdr->Diagnostic_Space), GET32(meta, hdr->Diagnostic_Space_Length)); 261 printf("Vendor_Specific_Logs %u:%u\n", GET32(meta, hdr->Vendor_Specific_Logs), GET32(meta, hdr->Vendor_Specific_Logs_Length)); 262 printf("**** Controller Data ****\n"); 263 printf("Controller_GUID "); 264 print_guid(meta->cdr->Controller_GUID); 265 printf("\n"); 266 printf("Controller_Type 0x%04x%04x 0x%04x%04x\n", 267 GET16(meta, cdr->Controller_Type.Vendor_ID), 268 GET16(meta, cdr->Controller_Type.Device_ID), 269 GET16(meta, cdr->Controller_Type.SubVendor_ID), 270 GET16(meta, cdr->Controller_Type.SubDevice_ID)); 271 printf("Product_ID '%.16s'\n", (char *)&meta->cdr->Product_ID[0]); 272 printf("**** Physical Disk Records ****\n"); 273 printf("Populated_PDEs %u\n", GET16(meta, pdr->Populated_PDEs)); 274 printf("Max_PDE_Supported %u\n", GET16(meta, pdr->Max_PDE_Supported)); 275 for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) { 276 if (isff(meta->pdr->entry[j].PD_GUID, 24)) 277 continue; 278 if (GET32(meta, pdr->entry[j].PD_Reference) == 0xffffffff) 279 continue; 280 printf("PD_GUID "); 281 print_guid(meta->pdr->entry[j].PD_GUID); 282 printf("\n"); 283 printf("PD_Reference 0x%08x\n", 284 GET32(meta, pdr->entry[j].PD_Reference)); 285 printf("PD_Type 0x%04x\n", 286 GET16(meta, pdr->entry[j].PD_Type)); 287 printf("PD_State 0x%04x\n", 288 GET16(meta, pdr->entry[j].PD_State)); 289 printf("Configured_Size %ju\n", 290 GET64(meta, pdr->entry[j].Configured_Size)); 291 printf("Block_Size %u\n", 292 GET16(meta, pdr->entry[j].Block_Size)); 293 } 294 printf("**** Virtual Disk Records ****\n"); 295 printf("Populated_VDEs %u\n", GET16(meta, vdr->Populated_VDEs)); 296 printf("Max_VDE_Supported %u\n", GET16(meta, vdr->Max_VDE_Supported)); 297 for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) { 298 if (isff(meta->vdr->entry[j].VD_GUID, 24)) 299 continue; 300 printf("VD_GUID "); 301 print_guid(meta->vdr->entry[j].VD_GUID); 302 printf("\n"); 303 printf("VD_Number 0x%04x\n", 304 GET16(meta, vdr->entry[j].VD_Number)); 305 printf("VD_Type 0x%04x\n", 306 GET16(meta, vdr->entry[j].VD_Type)); 307 printf("VD_State 0x%02x\n", 308 GET8(meta, vdr->entry[j].VD_State)); 309 printf("Init_State 0x%02x\n", 310 GET8(meta, vdr->entry[j].Init_State)); 311 printf("Drive_Failures_Remaining %u\n", 312 GET8(meta, vdr->entry[j].Drive_Failures_Remaining)); 313 printf("VD_Name '%.16s'\n", 314 (char *)&meta->vdr->entry[j].VD_Name); 315 } 316 printf("**** Configuration Records ****\n"); 317 num = GETCRNUM(meta); 318 for (j = 0; j < num; j++) { 319 vdc = GETVDCPTR(meta, j); 320 val = GET32D(meta, vdc->Signature); 321 switch (val) { 322 case DDF_VDCR_SIGNATURE: 323 printf("** Virtual Disk Configuration **\n"); 324 printf("VD_GUID "); 325 print_guid(vdc->VD_GUID); 326 printf("\n"); 327 printf("Timestamp 0x%08x\n", 328 GET32D(meta, vdc->Timestamp)); 329 printf("Sequence_Number 0x%08x\n", 330 GET32D(meta, vdc->Sequence_Number)); 331 printf("Primary_Element_Count %u\n", 332 GET16D(meta, vdc->Primary_Element_Count)); 333 printf("Stripe_Size %u\n", 334 GET8D(meta, vdc->Stripe_Size)); 335 printf("Primary_RAID_Level 0x%02x\n", 336 GET8D(meta, vdc->Primary_RAID_Level)); 337 printf("RLQ 0x%02x\n", 338 GET8D(meta, vdc->RLQ)); 339 printf("Secondary_Element_Count %u\n", 340 GET8D(meta, vdc->Secondary_Element_Count)); 341 printf("Secondary_Element_Seq %u\n", 342 GET8D(meta, vdc->Secondary_Element_Seq)); 343 printf("Secondary_RAID_Level 0x%02x\n", 344 GET8D(meta, vdc->Secondary_RAID_Level)); 345 printf("Block_Count %ju\n", 346 GET64D(meta, vdc->Block_Count)); 347 printf("VD_Size %ju\n", 348 GET64D(meta, vdc->VD_Size)); 349 printf("Block_Size %u\n", 350 GET16D(meta, vdc->Block_Size)); 351 printf("Rotate_Parity_count %u\n", 352 GET8D(meta, vdc->Rotate_Parity_count)); 353 printf("Associated_Spare_Disks"); 354 for (i = 0; i < 8; i++) { 355 if (GET32D(meta, vdc->Associated_Spares[i]) != 0xffffffff) 356 printf(" 0x%08x", GET32D(meta, vdc->Associated_Spares[i])); 357 } 358 printf("\n"); 359 printf("Cache_Flags %016jx\n", 360 GET64D(meta, vdc->Cache_Flags)); 361 printf("BG_Rate %u\n", 362 GET8D(meta, vdc->BG_Rate)); 363 printf("MDF_Parity_Disks %u\n", 364 GET8D(meta, vdc->MDF_Parity_Disks)); 365 printf("MDF_Parity_Generator_Polynomial 0x%04x\n", 366 GET16D(meta, vdc->MDF_Parity_Generator_Polynomial)); 367 printf("MDF_Constant_Generation_Method 0x%02x\n", 368 GET8D(meta, vdc->MDF_Constant_Generation_Method)); 369 printf("Physical_Disks "); 370 num2 = GET16D(meta, vdc->Primary_Element_Count); 371 val2 = (uint64_t *)&(vdc->Physical_Disk_Sequence[GET16(meta, hdr->Max_Primary_Element_Entries)]); 372 for (i = 0; i < num2; i++) 373 printf(" 0x%08x @ %ju", 374 GET32D(meta, vdc->Physical_Disk_Sequence[i]), 375 GET64P(meta, val2 + i)); 376 printf("\n"); 377 break; 378 case DDF_VUCR_SIGNATURE: 379 printf("** Vendor Unique Configuration **\n"); 380 vuc = (struct ddf_vuc_record *)vdc; 381 printf("VD_GUID "); 382 print_guid(vuc->VD_GUID); 383 printf("\n"); 384 break; 385 case DDF_SA_SIGNATURE: 386 printf("** Spare Assignment Configuration **\n"); 387 sa = (struct ddf_sa_record *)vdc; 388 printf("Timestamp 0x%08x\n", 389 GET32D(meta, sa->Timestamp)); 390 printf("Spare_Type 0x%02x\n", 391 GET8D(meta, sa->Spare_Type)); 392 printf("Populated_SAEs %u\n", 393 GET16D(meta, sa->Populated_SAEs)); 394 printf("MAX_SAE_Supported %u\n", 395 GET16D(meta, sa->MAX_SAE_Supported)); 396 for (i = 0; i < GET16D(meta, sa->Populated_SAEs); i++) { 397 if (isff(sa->entry[i].VD_GUID, 24)) 398 continue; 399 printf("VD_GUID "); 400 for (k = 0; k < 24; k++) 401 printf("%02x", sa->entry[i].VD_GUID[k]); 402 printf("\n"); 403 printf("Secondary_Element %u\n", 404 GET16D(meta, sa->entry[i].Secondary_Element)); 405 } 406 break; 407 case 0x00000000: 408 case 0xFFFFFFFF: 409 break; 410 default: 411 printf("Unknown configuration signature %08x\n", val); 412 break; 413 } 414 } 415 printf("**** Physical Disk Data ****\n"); 416 printf("PD_GUID "); 417 print_guid(meta->pdd->PD_GUID); 418 printf("\n"); 419 printf("PD_Reference 0x%08x\n", 420 GET32(meta, pdd->PD_Reference)); 421 printf("Forced_Ref_Flag 0x%02x\n", 422 GET8(meta, pdd->Forced_Ref_Flag)); 423 printf("Forced_PD_GUID_Flag 0x%02x\n", 424 GET8(meta, pdd->Forced_PD_GUID_Flag)); 425 } 426 427 static int 428 ddf_meta_find_pd(struct ddf_meta *meta, uint8_t *GUID, uint32_t PD_Reference) 429 { 430 int i; 431 432 for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) { 433 if (GUID != NULL) { 434 if (memcmp(meta->pdr->entry[i].PD_GUID, GUID, 24) == 0) 435 return (i); 436 } else if (PD_Reference != 0xffffffff) { 437 if (GET32(meta, pdr->entry[i].PD_Reference) == PD_Reference) 438 return (i); 439 } else 440 if (isff(meta->pdr->entry[i].PD_GUID, 24)) 441 return (i); 442 } 443 if (GUID == NULL && PD_Reference == 0xffffffff) { 444 if (i >= GET16(meta, pdr->Max_PDE_Supported)) 445 return (-1); 446 SET16(meta, pdr->Populated_PDEs, i + 1); 447 return (i); 448 } 449 return (-1); 450 } 451 452 static int 453 ddf_meta_find_vd(struct ddf_meta *meta, uint8_t *GUID) 454 { 455 int i; 456 457 for (i = 0; i < GET16(meta, vdr->Populated_VDEs); i++) { 458 if (GUID != NULL) { 459 if (memcmp(meta->vdr->entry[i].VD_GUID, GUID, 24) == 0) 460 return (i); 461 } else 462 if (isff(meta->vdr->entry[i].VD_GUID, 24)) 463 return (i); 464 } 465 if (GUID == NULL) { 466 if (i >= GET16(meta, vdr->Max_VDE_Supported)) 467 return (-1); 468 SET16(meta, vdr->Populated_VDEs, i + 1); 469 return (i); 470 } 471 return (-1); 472 } 473 474 static struct ddf_vdc_record * 475 ddf_meta_find_vdc(struct ddf_meta *meta, uint8_t *GUID) 476 { 477 struct ddf_vdc_record *vdc; 478 int i, num; 479 480 num = GETCRNUM(meta); 481 for (i = 0; i < num; i++) { 482 vdc = GETVDCPTR(meta, i); 483 if (GUID != NULL) { 484 if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE && 485 memcmp(vdc->VD_GUID, GUID, 24) == 0) 486 return (vdc); 487 } else 488 if (GET32D(meta, vdc->Signature) == 0xffffffff || 489 GET32D(meta, vdc->Signature) == 0) 490 return (vdc); 491 } 492 return (NULL); 493 } 494 495 static int 496 ddf_meta_count_vdc(struct ddf_meta *meta, uint8_t *GUID) 497 { 498 struct ddf_vdc_record *vdc; 499 int i, num, cnt; 500 501 cnt = 0; 502 num = GETCRNUM(meta); 503 for (i = 0; i < num; i++) { 504 vdc = GETVDCPTR(meta, i); 505 if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE) 506 continue; 507 if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0) 508 cnt++; 509 } 510 return (cnt); 511 } 512 513 static int 514 ddf_meta_find_disk(struct ddf_vol_meta *vmeta, uint32_t PD_Reference, 515 int *bvdp, int *posp) 516 { 517 int i, bvd, pos; 518 519 i = 0; 520 for (bvd = 0; bvd < GET8(vmeta, vdc->Secondary_Element_Count); bvd++) { 521 if (vmeta->bvdc[bvd] == NULL) { 522 i += GET16(vmeta, vdc->Primary_Element_Count); // XXX 523 continue; 524 } 525 for (pos = 0; pos < GET16(vmeta, bvdc[bvd]->Primary_Element_Count); 526 pos++, i++) { 527 if (GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]) == 528 PD_Reference) { 529 if (bvdp != NULL) 530 *bvdp = bvd; 531 if (posp != NULL) 532 *posp = pos; 533 return (i); 534 } 535 } 536 } 537 return (-1); 538 } 539 540 static struct ddf_sa_record * 541 ddf_meta_find_sa(struct ddf_meta *meta, int create) 542 { 543 struct ddf_sa_record *sa; 544 int i, num; 545 546 num = GETCRNUM(meta); 547 for (i = 0; i < num; i++) { 548 sa = GETSAPTR(meta, i); 549 if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE) 550 return (sa); 551 } 552 if (create) { 553 for (i = 0; i < num; i++) { 554 sa = GETSAPTR(meta, i); 555 if (GET32D(meta, sa->Signature) == 0xffffffff || 556 GET32D(meta, sa->Signature) == 0) 557 return (sa); 558 } 559 } 560 return (NULL); 561 } 562 563 static void 564 ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample) 565 { 566 struct timespec ts; 567 struct clocktime ct; 568 struct g_raid_md_ddf_perdisk *pd; 569 struct g_raid_md_ddf_object *mdi; 570 struct ddf_meta *meta; 571 struct ddf_pd_entry *pde; 572 off_t anchorlba; 573 u_int ss, pos, size; 574 int len, error; 575 char serial_buffer[24]; 576 577 if (sample->hdr == NULL) 578 sample = NULL; 579 580 mdi = (struct g_raid_md_ddf_object *)disk->d_softc->sc_md; 581 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 582 meta = &pd->pd_meta; 583 ss = disk->d_consumer->provider->sectorsize; 584 anchorlba = disk->d_consumer->provider->mediasize / ss - 1; 585 586 meta->sectorsize = ss; 587 meta->bigendian = sample ? sample->bigendian : mdi->mdio_bigendian; 588 getnanotime(&ts); 589 clock_ts_to_ct(&ts, &ct); 590 591 /* Header */ 592 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 593 memset(meta->hdr, 0xff, ss); 594 if (sample) { 595 memcpy(meta->hdr, sample->hdr, sizeof(struct ddf_header)); 596 if (ss != sample->sectorsize) { 597 SET32(meta, hdr->WorkSpace_Length, 598 howmany(GET32(sample, hdr->WorkSpace_Length) * 599 sample->sectorsize, ss)); 600 SET16(meta, hdr->Configuration_Record_Length, 601 howmany(GET16(sample, 602 hdr->Configuration_Record_Length) * 603 sample->sectorsize, ss)); 604 SET32(meta, hdr->cd_length, 605 howmany(GET32(sample, hdr->cd_length) * 606 sample->sectorsize, ss)); 607 SET32(meta, hdr->pdr_length, 608 howmany(GET32(sample, hdr->pdr_length) * 609 sample->sectorsize, ss)); 610 SET32(meta, hdr->vdr_length, 611 howmany(GET32(sample, hdr->vdr_length) * 612 sample->sectorsize, ss)); 613 SET32(meta, hdr->cr_length, 614 howmany(GET32(sample, hdr->cr_length) * 615 sample->sectorsize, ss)); 616 SET32(meta, hdr->pdd_length, 617 howmany(GET32(sample, hdr->pdd_length) * 618 sample->sectorsize, ss)); 619 SET32(meta, hdr->bbmlog_length, 620 howmany(GET32(sample, hdr->bbmlog_length) * 621 sample->sectorsize, ss)); 622 SET32(meta, hdr->Diagnostic_Space, 623 howmany(GET32(sample, hdr->bbmlog_length) * 624 sample->sectorsize, ss)); 625 SET32(meta, hdr->Vendor_Specific_Logs, 626 howmany(GET32(sample, hdr->bbmlog_length) * 627 sample->sectorsize, ss)); 628 } 629 } else { 630 SET32(meta, hdr->Signature, DDF_HEADER_SIGNATURE); 631 snprintf(meta->hdr->DDF_Header_GUID, 25, "FreeBSD %08x%08x", 632 (u_int)(ts.tv_sec - DECADE), arc4random()); 633 memcpy(meta->hdr->DDF_rev, "02.00.00", 8); 634 SET32(meta, hdr->TimeStamp, (ts.tv_sec - DECADE)); 635 SET32(meta, hdr->WorkSpace_Length, 16 * 1024 * 1024 / ss); 636 SET16(meta, hdr->Max_PD_Entries, DDF_MAX_DISKS - 1); 637 SET16(meta, hdr->Max_VD_Entries, DDF_MAX_VDISKS); 638 SET16(meta, hdr->Max_Partitions, DDF_MAX_PARTITIONS); 639 SET16(meta, hdr->Max_Primary_Element_Entries, DDF_MAX_DISKS); 640 SET16(meta, hdr->Configuration_Record_Length, 641 howmany(sizeof(struct ddf_vdc_record) + (4 + 8) * 642 GET16(meta, hdr->Max_Primary_Element_Entries), ss)); 643 SET32(meta, hdr->cd_length, 644 howmany(sizeof(struct ddf_cd_record), ss)); 645 SET32(meta, hdr->pdr_length, 646 howmany(sizeof(struct ddf_pd_record) + 647 sizeof(struct ddf_pd_entry) * GET16(meta, 648 hdr->Max_PD_Entries), ss)); 649 SET32(meta, hdr->vdr_length, 650 howmany(sizeof(struct ddf_vd_record) + 651 sizeof(struct ddf_vd_entry) * 652 GET16(meta, hdr->Max_VD_Entries), ss)); 653 SET32(meta, hdr->cr_length, 654 GET16(meta, hdr->Configuration_Record_Length) * 655 (GET16(meta, hdr->Max_Partitions) + 1)); 656 SET32(meta, hdr->pdd_length, 657 howmany(sizeof(struct ddf_pdd_record), ss)); 658 SET32(meta, hdr->bbmlog_length, 0); 659 SET32(meta, hdr->Diagnostic_Space_Length, 0); 660 SET32(meta, hdr->Vendor_Specific_Logs_Length, 0); 661 } 662 pos = 1; 663 SET32(meta, hdr->cd_section, pos); 664 pos += GET32(meta, hdr->cd_length); 665 SET32(meta, hdr->pdr_section, pos); 666 pos += GET32(meta, hdr->pdr_length); 667 SET32(meta, hdr->vdr_section, pos); 668 pos += GET32(meta, hdr->vdr_length); 669 SET32(meta, hdr->cr_section, pos); 670 pos += GET32(meta, hdr->cr_length); 671 SET32(meta, hdr->pdd_section, pos); 672 pos += GET32(meta, hdr->pdd_length); 673 SET32(meta, hdr->bbmlog_section, 674 GET32(meta, hdr->bbmlog_length) != 0 ? pos : 0xffffffff); 675 pos += GET32(meta, hdr->bbmlog_length); 676 SET32(meta, hdr->Diagnostic_Space, 677 GET32(meta, hdr->Diagnostic_Space_Length) != 0 ? pos : 0xffffffff); 678 pos += GET32(meta, hdr->Diagnostic_Space_Length); 679 SET32(meta, hdr->Vendor_Specific_Logs, 680 GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff); 681 pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1); 682 SET64(meta, hdr->Primary_Header_LBA, 683 anchorlba - pos); 684 SET64(meta, hdr->Secondary_Header_LBA, 685 0xffffffffffffffffULL); 686 SET64(meta, hdr->WorkSpace_LBA, 687 anchorlba + 1 - 32 * 1024 * 1024 / ss); 688 689 /* Controller Data */ 690 size = GET32(meta, hdr->cd_length) * ss; 691 meta->cdr = malloc(size, M_MD_DDF, M_WAITOK); 692 memset(meta->cdr, 0xff, size); 693 SET32(meta, cdr->Signature, DDF_CONTROLLER_DATA_SIGNATURE); 694 memcpy(meta->cdr->Controller_GUID, "FreeBSD GEOM RAID SERIAL", 24); 695 memcpy(meta->cdr->Product_ID, "FreeBSD GEOMRAID", 16); 696 697 /* Physical Drive Records. */ 698 size = GET32(meta, hdr->pdr_length) * ss; 699 meta->pdr = malloc(size, M_MD_DDF, M_WAITOK); 700 memset(meta->pdr, 0xff, size); 701 SET32(meta, pdr->Signature, DDF_PDR_SIGNATURE); 702 SET16(meta, pdr->Populated_PDEs, 1); 703 SET16(meta, pdr->Max_PDE_Supported, 704 GET16(meta, hdr->Max_PD_Entries)); 705 706 pde = &meta->pdr->entry[0]; 707 len = sizeof(serial_buffer); 708 error = g_io_getattr("GEOM::ident", disk->d_consumer, &len, serial_buffer); 709 if (error == 0 && (len = strlen (serial_buffer)) >= 6 && len <= 20) 710 snprintf(pde->PD_GUID, 25, "DISK%20s", serial_buffer); 711 else 712 snprintf(pde->PD_GUID, 25, "DISK%04d%02d%02d%08x%04x", 713 ct.year, ct.mon, ct.day, 714 arc4random(), arc4random() & 0xffff); 715 SET32D(meta, pde->PD_Reference, arc4random()); 716 SET16D(meta, pde->PD_Type, DDF_PDE_GUID_FORCE); 717 SET16D(meta, pde->PD_State, 0); 718 SET64D(meta, pde->Configured_Size, 719 anchorlba + 1 - 32 * 1024 * 1024 / ss); 720 SET16D(meta, pde->Block_Size, ss); 721 722 /* Virtual Drive Records. */ 723 size = GET32(meta, hdr->vdr_length) * ss; 724 meta->vdr = malloc(size, M_MD_DDF, M_WAITOK); 725 memset(meta->vdr, 0xff, size); 726 SET32(meta, vdr->Signature, DDF_VD_RECORD_SIGNATURE); 727 SET32(meta, vdr->Populated_VDEs, 0); 728 SET16(meta, vdr->Max_VDE_Supported, 729 GET16(meta, hdr->Max_VD_Entries)); 730 731 /* Configuration Records. */ 732 size = GET32(meta, hdr->cr_length) * ss; 733 meta->cr = malloc(size, M_MD_DDF, M_WAITOK); 734 memset(meta->cr, 0xff, size); 735 736 /* Physical Disk Data. */ 737 size = GET32(meta, hdr->pdd_length) * ss; 738 meta->pdd = malloc(size, M_MD_DDF, M_WAITOK); 739 memset(meta->pdd, 0xff, size); 740 SET32(meta, pdd->Signature, DDF_PDD_SIGNATURE); 741 memcpy(meta->pdd->PD_GUID, pde->PD_GUID, 24); 742 SET32(meta, pdd->PD_Reference, GET32D(meta, pde->PD_Reference)); 743 SET8(meta, pdd->Forced_Ref_Flag, DDF_PDD_FORCED_REF); 744 SET8(meta, pdd->Forced_PD_GUID_Flag, DDF_PDD_FORCED_GUID); 745 746 /* Bad Block Management Log. */ 747 if (GET32(meta, hdr->bbmlog_length) != 0) { 748 size = GET32(meta, hdr->bbmlog_length) * ss; 749 meta->bbm = malloc(size, M_MD_DDF, M_WAITOK); 750 memset(meta->bbm, 0xff, size); 751 SET32(meta, bbm->Signature, DDF_BBML_SIGNATURE); 752 SET32(meta, bbm->Entry_Count, 0); 753 SET32(meta, bbm->Spare_Block_Count, 0); 754 } 755 } 756 757 static void 758 ddf_meta_copy(struct ddf_meta *dst, struct ddf_meta *src) 759 { 760 struct ddf_header *hdr; 761 u_int ss; 762 763 hdr = src->hdr; 764 dst->bigendian = src->bigendian; 765 ss = dst->sectorsize = src->sectorsize; 766 dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 767 memcpy(dst->hdr, src->hdr, ss); 768 dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 769 memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss); 770 dst->pdr = malloc(GET32(src, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK); 771 memcpy(dst->pdr, src->pdr, GET32(src, hdr->pdr_length) * ss); 772 dst->vdr = malloc(GET32(src, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK); 773 memcpy(dst->vdr, src->vdr, GET32(src, hdr->vdr_length) * ss); 774 dst->cr = malloc(GET32(src, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK); 775 memcpy(dst->cr, src->cr, GET32(src, hdr->cr_length) * ss); 776 dst->pdd = malloc(GET32(src, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK); 777 memcpy(dst->pdd, src->pdd, GET32(src, hdr->pdd_length) * ss); 778 if (src->bbm != NULL) { 779 dst->bbm = malloc(GET32(src, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK); 780 memcpy(dst->bbm, src->bbm, GET32(src, hdr->bbmlog_length) * ss); 781 } 782 } 783 784 static void 785 ddf_meta_update(struct ddf_meta *meta, struct ddf_meta *src) 786 { 787 struct ddf_pd_entry *pde, *spde; 788 int i, j; 789 790 for (i = 0; i < GET16(src, pdr->Populated_PDEs); i++) { 791 spde = &src->pdr->entry[i]; 792 if (isff(spde->PD_GUID, 24)) 793 continue; 794 j = ddf_meta_find_pd(meta, NULL, 795 GET32(src, pdr->entry[i].PD_Reference)); 796 if (j < 0) { 797 j = ddf_meta_find_pd(meta, NULL, 0xffffffff); 798 pde = &meta->pdr->entry[j]; 799 memcpy(pde, spde, sizeof(*pde)); 800 } else { 801 pde = &meta->pdr->entry[j]; 802 SET16D(meta, pde->PD_State, 803 GET16D(meta, pde->PD_State) | 804 GET16D(src, pde->PD_State)); 805 } 806 } 807 } 808 809 static void 810 ddf_meta_free(struct ddf_meta *meta) 811 { 812 813 if (meta->hdr != NULL) { 814 free(meta->hdr, M_MD_DDF); 815 meta->hdr = NULL; 816 } 817 if (meta->cdr != NULL) { 818 free(meta->cdr, M_MD_DDF); 819 meta->cdr = NULL; 820 } 821 if (meta->pdr != NULL) { 822 free(meta->pdr, M_MD_DDF); 823 meta->pdr = NULL; 824 } 825 if (meta->vdr != NULL) { 826 free(meta->vdr, M_MD_DDF); 827 meta->vdr = NULL; 828 } 829 if (meta->cr != NULL) { 830 free(meta->cr, M_MD_DDF); 831 meta->cr = NULL; 832 } 833 if (meta->pdd != NULL) { 834 free(meta->pdd, M_MD_DDF); 835 meta->pdd = NULL; 836 } 837 if (meta->bbm != NULL) { 838 free(meta->bbm, M_MD_DDF); 839 meta->bbm = NULL; 840 } 841 } 842 843 static void 844 ddf_vol_meta_create(struct ddf_vol_meta *meta, struct ddf_meta *sample) 845 { 846 struct timespec ts; 847 struct clocktime ct; 848 struct ddf_header *hdr; 849 u_int ss, size; 850 851 hdr = sample->hdr; 852 meta->bigendian = sample->bigendian; 853 ss = meta->sectorsize = sample->sectorsize; 854 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 855 memcpy(meta->hdr, sample->hdr, ss); 856 meta->cdr = malloc(GET32(sample, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 857 memcpy(meta->cdr, sample->cdr, GET32(sample, hdr->cd_length) * ss); 858 meta->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK); 859 memset(meta->vde, 0xff, sizeof(struct ddf_vd_entry)); 860 getnanotime(&ts); 861 clock_ts_to_ct(&ts, &ct); 862 snprintf(meta->vde->VD_GUID, 25, "FreeBSD%04d%02d%02d%08x%01x", 863 ct.year, ct.mon, ct.day, 864 arc4random(), arc4random() & 0xf); 865 size = GET16(sample, hdr->Configuration_Record_Length) * ss; 866 meta->vdc = malloc(size, M_MD_DDF, M_WAITOK); 867 memset(meta->vdc, 0xff, size); 868 SET32(meta, vdc->Signature, DDF_VDCR_SIGNATURE); 869 memcpy(meta->vdc->VD_GUID, meta->vde->VD_GUID, 24); 870 SET32(meta, vdc->Sequence_Number, 0); 871 } 872 873 static void 874 ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, 875 uint8_t *GUID, int started) 876 { 877 struct ddf_header *hdr; 878 struct ddf_vd_entry *vde; 879 struct ddf_vdc_record *vdc; 880 int vnew, bvnew, bvd, size; 881 u_int ss; 882 883 hdr = src->hdr; 884 vde = &src->vdr->entry[ddf_meta_find_vd(src, GUID)]; 885 vdc = ddf_meta_find_vdc(src, GUID); 886 if (GET8D(src, vdc->Secondary_Element_Count) == 1) 887 bvd = 0; 888 else 889 bvd = GET8D(src, vdc->Secondary_Element_Seq); 890 size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize; 891 892 if (dst->vdc == NULL || 893 (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) - 894 GET32(dst, vdc->Sequence_Number))) > 0)) 895 vnew = 1; 896 else 897 vnew = 0; 898 899 if (dst->bvdc[bvd] == NULL || 900 (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) - 901 GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)) 902 bvnew = 1; 903 else 904 bvnew = 0; 905 906 if (vnew) { 907 dst->bigendian = src->bigendian; 908 ss = dst->sectorsize = src->sectorsize; 909 if (dst->hdr != NULL) 910 free(dst->hdr, M_MD_DDF); 911 dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 912 memcpy(dst->hdr, src->hdr, ss); 913 if (dst->cdr != NULL) 914 free(dst->cdr, M_MD_DDF); 915 dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 916 memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss); 917 if (dst->vde != NULL) 918 free(dst->vde, M_MD_DDF); 919 dst->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK); 920 memcpy(dst->vde, vde, sizeof(struct ddf_vd_entry)); 921 if (dst->vdc != NULL) 922 free(dst->vdc, M_MD_DDF); 923 dst->vdc = malloc(size, M_MD_DDF, M_WAITOK); 924 memcpy(dst->vdc, vdc, size); 925 } 926 if (bvnew) { 927 if (dst->bvdc[bvd] != NULL) 928 free(dst->bvdc[bvd], M_MD_DDF); 929 dst->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK); 930 memcpy(dst->bvdc[bvd], vdc, size); 931 } 932 } 933 934 static void 935 ddf_vol_meta_free(struct ddf_vol_meta *meta) 936 { 937 int i; 938 939 if (meta->hdr != NULL) { 940 free(meta->hdr, M_MD_DDF); 941 meta->hdr = NULL; 942 } 943 if (meta->cdr != NULL) { 944 free(meta->cdr, M_MD_DDF); 945 meta->cdr = NULL; 946 } 947 if (meta->vde != NULL) { 948 free(meta->vde, M_MD_DDF); 949 meta->vde = NULL; 950 } 951 if (meta->vdc != NULL) { 952 free(meta->vdc, M_MD_DDF); 953 meta->vdc = NULL; 954 } 955 for (i = 0; i < DDF_MAX_DISKS_HARD; i++) { 956 if (meta->bvdc[i] != NULL) { 957 free(meta->bvdc[i], M_MD_DDF); 958 meta->bvdc[i] = NULL; 959 } 960 } 961 } 962 963 static int 964 ddf_meta_unused_range(struct ddf_meta *meta, off_t *off, off_t *size) 965 { 966 struct ddf_vdc_record *vdc; 967 off_t beg[32], end[32], beg1, end1; 968 uint64_t *offp; 969 int i, j, n, num, pos; 970 uint32_t ref; 971 972 *off = 0; 973 *size = 0; 974 ref = GET32(meta, pdd->PD_Reference); 975 pos = ddf_meta_find_pd(meta, NULL, ref); 976 beg[0] = 0; 977 end[0] = GET64(meta, pdr->entry[pos].Configured_Size); 978 n = 1; 979 num = GETCRNUM(meta); 980 for (i = 0; i < num; i++) { 981 vdc = GETVDCPTR(meta, i); 982 if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE) 983 continue; 984 for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++) 985 if (GET32D(meta, vdc->Physical_Disk_Sequence[pos]) == ref) 986 break; 987 if (pos == GET16D(meta, vdc->Primary_Element_Count)) 988 continue; 989 offp = (uint64_t *)&(vdc->Physical_Disk_Sequence[ 990 GET16(meta, hdr->Max_Primary_Element_Entries)]); 991 beg1 = GET64P(meta, offp + pos); 992 end1 = beg1 + GET64D(meta, vdc->Block_Count); 993 for (j = 0; j < n; j++) { 994 if (beg[j] >= end1 || end[j] <= beg1 ) 995 continue; 996 if (beg[j] < beg1 && end[j] > end1) { 997 beg[n] = end1; 998 end[n] = end[j]; 999 end[j] = beg1; 1000 n++; 1001 } else if (beg[j] < beg1) 1002 end[j] = beg1; 1003 else 1004 beg[j] = end1; 1005 } 1006 } 1007 for (j = 0; j < n; j++) { 1008 if (end[j] - beg[j] > *size) { 1009 *off = beg[j]; 1010 *size = end[j] - beg[j]; 1011 } 1012 } 1013 return ((*size > 0) ? 1 : 0); 1014 } 1015 1016 static void 1017 ddf_meta_get_name(struct ddf_meta *meta, int num, char *buf) 1018 { 1019 const char *b; 1020 int i; 1021 1022 b = meta->vdr->entry[num].VD_Name; 1023 for (i = 15; i >= 0; i--) 1024 if (b[i] != 0x20) 1025 break; 1026 memcpy(buf, b, i + 1); 1027 buf[i + 1] = 0; 1028 } 1029 1030 static void 1031 ddf_meta_put_name(struct ddf_vol_meta *meta, char *buf) 1032 { 1033 int len; 1034 1035 len = min(strlen(buf), 16); 1036 memset(meta->vde->VD_Name, 0x20, 16); 1037 memcpy(meta->vde->VD_Name, buf, len); 1038 } 1039 1040 static int 1041 ddf_meta_read(struct g_consumer *cp, struct ddf_meta *meta) 1042 { 1043 struct g_provider *pp; 1044 struct ddf_header *ahdr, *hdr; 1045 char *abuf, *buf; 1046 off_t plba, slba, lba; 1047 int error, len, i; 1048 u_int ss; 1049 uint32_t val; 1050 1051 ddf_meta_free(meta); 1052 pp = cp->provider; 1053 ss = meta->sectorsize = pp->sectorsize; 1054 /* Read anchor block. */ 1055 abuf = g_read_data(cp, pp->mediasize - ss, ss, &error); 1056 if (abuf == NULL) { 1057 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 1058 pp->name, error); 1059 return (error); 1060 } 1061 ahdr = (struct ddf_header *)abuf; 1062 1063 /* Check if this is an DDF RAID struct */ 1064 if (be32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE) 1065 meta->bigendian = 1; 1066 else if (le32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE) 1067 meta->bigendian = 0; 1068 else { 1069 G_RAID_DEBUG(1, "DDF signature check failed on %s", pp->name); 1070 error = EINVAL; 1071 goto done; 1072 } 1073 if (ahdr->Header_Type != DDF_HEADER_ANCHOR) { 1074 G_RAID_DEBUG(1, "DDF header type check failed on %s", pp->name); 1075 error = EINVAL; 1076 goto done; 1077 } 1078 meta->hdr = ahdr; 1079 plba = GET64(meta, hdr->Primary_Header_LBA); 1080 slba = GET64(meta, hdr->Secondary_Header_LBA); 1081 val = GET32(meta, hdr->CRC); 1082 SET32(meta, hdr->CRC, 0xffffffff); 1083 meta->hdr = NULL; 1084 if (crc32(ahdr, ss) != val) { 1085 G_RAID_DEBUG(1, "DDF CRC mismatch on %s", pp->name); 1086 error = EINVAL; 1087 goto done; 1088 } 1089 if ((plba + 6) * ss >= pp->mediasize) { 1090 G_RAID_DEBUG(1, "DDF primary header LBA is wrong on %s", pp->name); 1091 error = EINVAL; 1092 goto done; 1093 } 1094 if (slba != -1 && (slba + 6) * ss >= pp->mediasize) { 1095 G_RAID_DEBUG(1, "DDF secondary header LBA is wrong on %s", pp->name); 1096 error = EINVAL; 1097 goto done; 1098 } 1099 lba = plba; 1100 1101 doread: 1102 error = 0; 1103 ddf_meta_free(meta); 1104 1105 /* Read header block. */ 1106 buf = g_read_data(cp, lba * ss, ss, &error); 1107 if (buf == NULL) { 1108 readerror: 1109 G_RAID_DEBUG(1, "DDF %s metadata read error on %s (error=%d).", 1110 (lba == plba) ? "primary" : "secondary", pp->name, error); 1111 if (lba == plba && slba != -1) { 1112 lba = slba; 1113 goto doread; 1114 } 1115 G_RAID_DEBUG(1, "DDF metadata read error on %s.", pp->name); 1116 goto done; 1117 } 1118 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 1119 memcpy(meta->hdr, buf, ss); 1120 g_free(buf); 1121 hdr = meta->hdr; 1122 val = GET32(meta, hdr->CRC); 1123 SET32(meta, hdr->CRC, 0xffffffff); 1124 if (hdr->Signature != ahdr->Signature || 1125 crc32(meta->hdr, ss) != val || 1126 memcmp(hdr->DDF_Header_GUID, ahdr->DDF_Header_GUID, 24) || 1127 GET64(meta, hdr->Primary_Header_LBA) != plba || 1128 GET64(meta, hdr->Secondary_Header_LBA) != slba) { 1129 hdrerror: 1130 G_RAID_DEBUG(1, "DDF %s metadata check failed on %s", 1131 (lba == plba) ? "primary" : "secondary", pp->name); 1132 if (lba == plba && slba != -1) { 1133 lba = slba; 1134 goto doread; 1135 } 1136 G_RAID_DEBUG(1, "DDF metadata check failed on %s", pp->name); 1137 error = EINVAL; 1138 goto done; 1139 } 1140 if ((lba == plba && hdr->Header_Type != DDF_HEADER_PRIMARY) || 1141 (lba == slba && hdr->Header_Type != DDF_HEADER_SECONDARY)) 1142 goto hdrerror; 1143 len = 1; 1144 len = max(len, GET32(meta, hdr->cd_section) + GET32(meta, hdr->cd_length)); 1145 len = max(len, GET32(meta, hdr->pdr_section) + GET32(meta, hdr->pdr_length)); 1146 len = max(len, GET32(meta, hdr->vdr_section) + GET32(meta, hdr->vdr_length)); 1147 len = max(len, GET32(meta, hdr->cr_section) + GET32(meta, hdr->cr_length)); 1148 len = max(len, GET32(meta, hdr->pdd_section) + GET32(meta, hdr->pdd_length)); 1149 if ((val = GET32(meta, hdr->bbmlog_section)) != 0xffffffff) 1150 len = max(len, val + GET32(meta, hdr->bbmlog_length)); 1151 if ((val = GET32(meta, hdr->Diagnostic_Space)) != 0xffffffff) 1152 len = max(len, val + GET32(meta, hdr->Diagnostic_Space_Length)); 1153 if ((val = GET32(meta, hdr->Vendor_Specific_Logs)) != 0xffffffff) 1154 len = max(len, val + GET32(meta, hdr->Vendor_Specific_Logs_Length)); 1155 if ((plba + len) * ss >= pp->mediasize) 1156 goto hdrerror; 1157 if (slba != -1 && (slba + len) * ss >= pp->mediasize) 1158 goto hdrerror; 1159 /* Workaround for Adaptec implementation. */ 1160 if (GET16(meta, hdr->Max_Primary_Element_Entries) == 0xffff) { 1161 SET16(meta, hdr->Max_Primary_Element_Entries, 1162 min(GET16(meta, hdr->Max_PD_Entries), 1163 (GET16(meta, hdr->Configuration_Record_Length) * ss - 512) / 12)); 1164 } 1165 1166 if (GET32(meta, hdr->cd_length) * ss >= MAXPHYS || 1167 GET32(meta, hdr->pdr_length) * ss >= MAXPHYS || 1168 GET32(meta, hdr->vdr_length) * ss >= MAXPHYS || 1169 GET32(meta, hdr->cr_length) * ss >= MAXPHYS || 1170 GET32(meta, hdr->pdd_length) * ss >= MAXPHYS || 1171 GET32(meta, hdr->bbmlog_length) * ss >= MAXPHYS) { 1172 G_RAID_DEBUG(1, "%s: Blocksize is too big.", pp->name); 1173 goto hdrerror; 1174 } 1175 1176 /* Read controller data. */ 1177 buf = g_read_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss, 1178 GET32(meta, hdr->cd_length) * ss, &error); 1179 if (buf == NULL) 1180 goto readerror; 1181 meta->cdr = malloc(GET32(meta, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 1182 memcpy(meta->cdr, buf, GET32(meta, hdr->cd_length) * ss); 1183 g_free(buf); 1184 if (GET32(meta, cdr->Signature) != DDF_CONTROLLER_DATA_SIGNATURE) 1185 goto hdrerror; 1186 1187 /* Read physical disk records. */ 1188 buf = g_read_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss, 1189 GET32(meta, hdr->pdr_length) * ss, &error); 1190 if (buf == NULL) 1191 goto readerror; 1192 meta->pdr = malloc(GET32(meta, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK); 1193 memcpy(meta->pdr, buf, GET32(meta, hdr->pdr_length) * ss); 1194 g_free(buf); 1195 if (GET32(meta, pdr->Signature) != DDF_PDR_SIGNATURE) 1196 goto hdrerror; 1197 /* 1198 * Workaround for reading metadata corrupted due to graid bug. 1199 * XXX: Remove this before we have disks above 128PB. :) 1200 */ 1201 if (meta->bigendian) { 1202 for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) { 1203 if (isff(meta->pdr->entry[i].PD_GUID, 24)) 1204 continue; 1205 if (GET32(meta, pdr->entry[i].PD_Reference) == 1206 0xffffffff) 1207 continue; 1208 if (GET64(meta, pdr->entry[i].Configured_Size) >= 1209 (1ULL << 48)) { 1210 SET16(meta, pdr->entry[i].PD_State, 1211 GET16(meta, pdr->entry[i].PD_State) & 1212 ~DDF_PDE_FAILED); 1213 SET64(meta, pdr->entry[i].Configured_Size, 1214 GET64(meta, pdr->entry[i].Configured_Size) & 1215 ((1ULL << 48) - 1)); 1216 } 1217 } 1218 } 1219 1220 /* Read virtual disk records. */ 1221 buf = g_read_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss, 1222 GET32(meta, hdr->vdr_length) * ss, &error); 1223 if (buf == NULL) 1224 goto readerror; 1225 meta->vdr = malloc(GET32(meta, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK); 1226 memcpy(meta->vdr, buf, GET32(meta, hdr->vdr_length) * ss); 1227 g_free(buf); 1228 if (GET32(meta, vdr->Signature) != DDF_VD_RECORD_SIGNATURE) 1229 goto hdrerror; 1230 1231 /* Read configuration records. */ 1232 buf = g_read_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss, 1233 GET32(meta, hdr->cr_length) * ss, &error); 1234 if (buf == NULL) 1235 goto readerror; 1236 meta->cr = malloc(GET32(meta, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK); 1237 memcpy(meta->cr, buf, GET32(meta, hdr->cr_length) * ss); 1238 g_free(buf); 1239 1240 /* Read physical disk data. */ 1241 buf = g_read_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss, 1242 GET32(meta, hdr->pdd_length) * ss, &error); 1243 if (buf == NULL) 1244 goto readerror; 1245 meta->pdd = malloc(GET32(meta, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK); 1246 memcpy(meta->pdd, buf, GET32(meta, hdr->pdd_length) * ss); 1247 g_free(buf); 1248 if (GET32(meta, pdd->Signature) != DDF_PDD_SIGNATURE) 1249 goto hdrerror; 1250 i = ddf_meta_find_pd(meta, NULL, GET32(meta, pdd->PD_Reference)); 1251 if (i < 0) 1252 goto hdrerror; 1253 1254 /* Read BBM Log. */ 1255 if (GET32(meta, hdr->bbmlog_section) != 0xffffffff && 1256 GET32(meta, hdr->bbmlog_length) != 0) { 1257 buf = g_read_data(cp, (lba + GET32(meta, hdr->bbmlog_section)) * ss, 1258 GET32(meta, hdr->bbmlog_length) * ss, &error); 1259 if (buf == NULL) 1260 goto readerror; 1261 meta->bbm = malloc(GET32(meta, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK); 1262 memcpy(meta->bbm, buf, GET32(meta, hdr->bbmlog_length) * ss); 1263 g_free(buf); 1264 if (GET32(meta, bbm->Signature) != DDF_BBML_SIGNATURE) 1265 goto hdrerror; 1266 } 1267 1268 done: 1269 g_free(abuf); 1270 if (error != 0) 1271 ddf_meta_free(meta); 1272 return (error); 1273 } 1274 1275 static int 1276 ddf_meta_write(struct g_consumer *cp, struct ddf_meta *meta) 1277 { 1278 struct g_provider *pp; 1279 struct ddf_vdc_record *vdc; 1280 off_t alba, plba, slba, lba; 1281 u_int ss, size; 1282 int error, i, num; 1283 1284 pp = cp->provider; 1285 ss = pp->sectorsize; 1286 lba = alba = pp->mediasize / ss - 1; 1287 plba = GET64(meta, hdr->Primary_Header_LBA); 1288 slba = GET64(meta, hdr->Secondary_Header_LBA); 1289 1290 next: 1291 SET8(meta, hdr->Header_Type, (lba == alba) ? DDF_HEADER_ANCHOR : 1292 (lba == plba) ? DDF_HEADER_PRIMARY : DDF_HEADER_SECONDARY); 1293 SET32(meta, hdr->CRC, 0xffffffff); 1294 SET32(meta, hdr->CRC, crc32(meta->hdr, ss)); 1295 error = g_write_data(cp, lba * ss, meta->hdr, ss); 1296 if (error != 0) { 1297 err: 1298 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 1299 pp->name, error); 1300 if (lba != alba) 1301 goto done; 1302 } 1303 if (lba == alba) { 1304 lba = plba; 1305 goto next; 1306 } 1307 1308 size = GET32(meta, hdr->cd_length) * ss; 1309 SET32(meta, cdr->CRC, 0xffffffff); 1310 SET32(meta, cdr->CRC, crc32(meta->cdr, size)); 1311 error = g_write_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss, 1312 meta->cdr, size); 1313 if (error != 0) 1314 goto err; 1315 1316 size = GET32(meta, hdr->pdr_length) * ss; 1317 SET32(meta, pdr->CRC, 0xffffffff); 1318 SET32(meta, pdr->CRC, crc32(meta->pdr, size)); 1319 error = g_write_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss, 1320 meta->pdr, size); 1321 if (error != 0) 1322 goto err; 1323 1324 size = GET32(meta, hdr->vdr_length) * ss; 1325 SET32(meta, vdr->CRC, 0xffffffff); 1326 SET32(meta, vdr->CRC, crc32(meta->vdr, size)); 1327 error = g_write_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss, 1328 meta->vdr, size); 1329 if (error != 0) 1330 goto err; 1331 1332 size = GET16(meta, hdr->Configuration_Record_Length) * ss; 1333 num = GETCRNUM(meta); 1334 for (i = 0; i < num; i++) { 1335 vdc = GETVDCPTR(meta, i); 1336 SET32D(meta, vdc->CRC, 0xffffffff); 1337 SET32D(meta, vdc->CRC, crc32(vdc, size)); 1338 } 1339 error = g_write_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss, 1340 meta->cr, size * num); 1341 if (error != 0) 1342 goto err; 1343 1344 size = GET32(meta, hdr->pdd_length) * ss; 1345 SET32(meta, pdd->CRC, 0xffffffff); 1346 SET32(meta, pdd->CRC, crc32(meta->pdd, size)); 1347 error = g_write_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss, 1348 meta->pdd, size); 1349 if (error != 0) 1350 goto err; 1351 1352 if (GET32(meta, hdr->bbmlog_length) != 0) { 1353 size = GET32(meta, hdr->bbmlog_length) * ss; 1354 SET32(meta, bbm->CRC, 0xffffffff); 1355 SET32(meta, bbm->CRC, crc32(meta->bbm, size)); 1356 error = g_write_data(cp, 1357 (lba + GET32(meta, hdr->bbmlog_section)) * ss, 1358 meta->bbm, size); 1359 if (error != 0) 1360 goto err; 1361 } 1362 1363 done: 1364 if (lba == plba && slba != -1) { 1365 lba = slba; 1366 goto next; 1367 } 1368 1369 return (error); 1370 } 1371 1372 static int 1373 ddf_meta_erase(struct g_consumer *cp) 1374 { 1375 struct g_provider *pp; 1376 char *buf; 1377 int error; 1378 1379 pp = cp->provider; 1380 buf = malloc(pp->sectorsize, M_MD_DDF, M_WAITOK | M_ZERO); 1381 error = g_write_data(cp, pp->mediasize - pp->sectorsize, 1382 buf, pp->sectorsize); 1383 if (error != 0) { 1384 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 1385 pp->name, error); 1386 } 1387 free(buf, M_MD_DDF); 1388 return (error); 1389 } 1390 1391 static struct g_raid_volume * 1392 g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID) 1393 { 1394 struct g_raid_volume *vol; 1395 struct g_raid_md_ddf_pervolume *pv; 1396 1397 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1398 pv = vol->v_md_data; 1399 if (memcmp(pv->pv_meta.vde->VD_GUID, GUID, 24) == 0) 1400 break; 1401 } 1402 return (vol); 1403 } 1404 1405 static struct g_raid_disk * 1406 g_raid_md_ddf_get_disk(struct g_raid_softc *sc, uint8_t *GUID, uint32_t id) 1407 { 1408 struct g_raid_disk *disk; 1409 struct g_raid_md_ddf_perdisk *pd; 1410 struct ddf_meta *meta; 1411 1412 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1413 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1414 meta = &pd->pd_meta; 1415 if (GUID != NULL) { 1416 if (memcmp(meta->pdd->PD_GUID, GUID, 24) == 0) 1417 break; 1418 } else { 1419 if (GET32(meta, pdd->PD_Reference) == id) 1420 break; 1421 } 1422 } 1423 return (disk); 1424 } 1425 1426 static int 1427 g_raid_md_ddf_purge_volumes(struct g_raid_softc *sc) 1428 { 1429 struct g_raid_volume *vol, *tvol; 1430 struct g_raid_md_ddf_pervolume *pv; 1431 int i, res; 1432 1433 res = 0; 1434 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 1435 pv = vol->v_md_data; 1436 if (vol->v_stopping) 1437 continue; 1438 for (i = 0; i < vol->v_disks_count; i++) { 1439 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 1440 break; 1441 } 1442 if (i >= vol->v_disks_count) { 1443 g_raid_destroy_volume(vol); 1444 res = 1; 1445 } 1446 } 1447 return (res); 1448 } 1449 1450 static int 1451 g_raid_md_ddf_purge_disks(struct g_raid_softc *sc) 1452 { 1453 #if 0 1454 struct g_raid_disk *disk, *tdisk; 1455 struct g_raid_volume *vol; 1456 struct g_raid_md_ddf_perdisk *pd; 1457 int i, j, res; 1458 1459 res = 0; 1460 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 1461 if (disk->d_state == G_RAID_DISK_S_SPARE) 1462 continue; 1463 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1464 1465 /* Scan for deleted volumes. */ 1466 for (i = 0; i < pd->pd_subdisks; ) { 1467 vol = g_raid_md_ddf_get_volume(sc, 1468 pd->pd_meta[i]->volume_id); 1469 if (vol != NULL && !vol->v_stopping) { 1470 i++; 1471 continue; 1472 } 1473 free(pd->pd_meta[i], M_MD_DDF); 1474 for (j = i; j < pd->pd_subdisks - 1; j++) 1475 pd->pd_meta[j] = pd->pd_meta[j + 1]; 1476 pd->pd_meta[DDF_MAX_SUBDISKS - 1] = NULL; 1477 pd->pd_subdisks--; 1478 pd->pd_updated = 1; 1479 } 1480 1481 /* If there is no metadata left - erase and delete disk. */ 1482 if (pd->pd_subdisks == 0) { 1483 ddf_meta_erase(disk->d_consumer); 1484 g_raid_destroy_disk(disk); 1485 res = 1; 1486 } 1487 } 1488 return (res); 1489 #endif 1490 return (0); 1491 } 1492 1493 static int 1494 g_raid_md_ddf_supported(int level, int qual, int disks, int force) 1495 { 1496 1497 if (disks > DDF_MAX_DISKS_HARD) 1498 return (0); 1499 switch (level) { 1500 case G_RAID_VOLUME_RL_RAID0: 1501 if (qual != G_RAID_VOLUME_RLQ_NONE) 1502 return (0); 1503 if (disks < 1) 1504 return (0); 1505 if (!force && disks < 2) 1506 return (0); 1507 break; 1508 case G_RAID_VOLUME_RL_RAID1: 1509 if (disks < 1) 1510 return (0); 1511 if (qual == G_RAID_VOLUME_RLQ_R1SM) { 1512 if (!force && disks != 2) 1513 return (0); 1514 } else if (qual == G_RAID_VOLUME_RLQ_R1MM) { 1515 if (!force && disks != 3) 1516 return (0); 1517 } else 1518 return (0); 1519 break; 1520 case G_RAID_VOLUME_RL_RAID3: 1521 if (qual != G_RAID_VOLUME_RLQ_R3P0 && 1522 qual != G_RAID_VOLUME_RLQ_R3PN) 1523 return (0); 1524 if (disks < 3) 1525 return (0); 1526 break; 1527 case G_RAID_VOLUME_RL_RAID4: 1528 if (qual != G_RAID_VOLUME_RLQ_R4P0 && 1529 qual != G_RAID_VOLUME_RLQ_R4PN) 1530 return (0); 1531 if (disks < 3) 1532 return (0); 1533 break; 1534 case G_RAID_VOLUME_RL_RAID5: 1535 if (qual != G_RAID_VOLUME_RLQ_R5RA && 1536 qual != G_RAID_VOLUME_RLQ_R5RS && 1537 qual != G_RAID_VOLUME_RLQ_R5LA && 1538 qual != G_RAID_VOLUME_RLQ_R5LS) 1539 return (0); 1540 if (disks < 3) 1541 return (0); 1542 break; 1543 case G_RAID_VOLUME_RL_RAID6: 1544 if (qual != G_RAID_VOLUME_RLQ_R6RA && 1545 qual != G_RAID_VOLUME_RLQ_R6RS && 1546 qual != G_RAID_VOLUME_RLQ_R6LA && 1547 qual != G_RAID_VOLUME_RLQ_R6LS) 1548 return (0); 1549 if (disks < 4) 1550 return (0); 1551 break; 1552 case G_RAID_VOLUME_RL_RAIDMDF: 1553 if (qual != G_RAID_VOLUME_RLQ_RMDFRA && 1554 qual != G_RAID_VOLUME_RLQ_RMDFRS && 1555 qual != G_RAID_VOLUME_RLQ_RMDFLA && 1556 qual != G_RAID_VOLUME_RLQ_RMDFLS) 1557 return (0); 1558 if (disks < 4) 1559 return (0); 1560 break; 1561 case G_RAID_VOLUME_RL_RAID1E: 1562 if (qual != G_RAID_VOLUME_RLQ_R1EA && 1563 qual != G_RAID_VOLUME_RLQ_R1EO) 1564 return (0); 1565 if (disks < 3) 1566 return (0); 1567 break; 1568 case G_RAID_VOLUME_RL_SINGLE: 1569 if (qual != G_RAID_VOLUME_RLQ_NONE) 1570 return (0); 1571 if (disks != 1) 1572 return (0); 1573 break; 1574 case G_RAID_VOLUME_RL_CONCAT: 1575 if (qual != G_RAID_VOLUME_RLQ_NONE) 1576 return (0); 1577 if (disks < 2) 1578 return (0); 1579 break; 1580 case G_RAID_VOLUME_RL_RAID5E: 1581 if (qual != G_RAID_VOLUME_RLQ_R5ERA && 1582 qual != G_RAID_VOLUME_RLQ_R5ERS && 1583 qual != G_RAID_VOLUME_RLQ_R5ELA && 1584 qual != G_RAID_VOLUME_RLQ_R5ELS) 1585 return (0); 1586 if (disks < 4) 1587 return (0); 1588 break; 1589 case G_RAID_VOLUME_RL_RAID5EE: 1590 if (qual != G_RAID_VOLUME_RLQ_R5EERA && 1591 qual != G_RAID_VOLUME_RLQ_R5EERS && 1592 qual != G_RAID_VOLUME_RLQ_R5EELA && 1593 qual != G_RAID_VOLUME_RLQ_R5EELS) 1594 return (0); 1595 if (disks < 4) 1596 return (0); 1597 break; 1598 case G_RAID_VOLUME_RL_RAID5R: 1599 if (qual != G_RAID_VOLUME_RLQ_R5RRA && 1600 qual != G_RAID_VOLUME_RLQ_R5RRS && 1601 qual != G_RAID_VOLUME_RLQ_R5RLA && 1602 qual != G_RAID_VOLUME_RLQ_R5RLS) 1603 return (0); 1604 if (disks < 3) 1605 return (0); 1606 break; 1607 default: 1608 return (0); 1609 } 1610 return (1); 1611 } 1612 1613 static int 1614 g_raid_md_ddf_start_disk(struct g_raid_disk *disk, struct g_raid_volume *vol) 1615 { 1616 struct g_raid_softc *sc; 1617 struct g_raid_subdisk *sd; 1618 struct g_raid_md_ddf_perdisk *pd; 1619 struct g_raid_md_ddf_pervolume *pv; 1620 struct g_raid_md_ddf_object *mdi; 1621 struct ddf_vol_meta *vmeta; 1622 struct ddf_meta *pdmeta, *gmeta; 1623 struct ddf_vdc_record *vdc1; 1624 struct ddf_sa_record *sa; 1625 off_t size, eoff = 0, esize = 0; 1626 uint64_t *val2; 1627 int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos; 1628 int i, resurrection = 0; 1629 uint32_t reference; 1630 1631 sc = disk->d_softc; 1632 mdi = (struct g_raid_md_ddf_object *)sc->sc_md; 1633 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1634 pdmeta = &pd->pd_meta; 1635 reference = GET32(&pd->pd_meta, pdd->PD_Reference); 1636 1637 pv = vol->v_md_data; 1638 vmeta = &pv->pv_meta; 1639 gmeta = &mdi->mdio_meta; 1640 1641 /* Find disk position in metadata by its reference. */ 1642 disk_pos = ddf_meta_find_disk(vmeta, reference, 1643 &md_disk_bvd, &md_disk_pos); 1644 md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference); 1645 1646 if (disk_pos < 0) { 1647 G_RAID_DEBUG1(1, sc, 1648 "Disk %s is not a present part of the volume %s", 1649 g_raid_get_diskname(disk), vol->v_name); 1650 1651 /* Failed stale disk is useless for us. */ 1652 if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) != 0) { 1653 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 1654 return (0); 1655 } 1656 1657 /* If disk has some metadata for this volume - erase. */ 1658 if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) 1659 SET32D(pdmeta, vdc1->Signature, 0xffffffff); 1660 1661 /* If we are in the start process, that's all for now. */ 1662 if (!pv->pv_started) 1663 goto nofit; 1664 /* 1665 * If we have already started - try to get use of the disk. 1666 * Try to replace OFFLINE disks first, then FAILED. 1667 */ 1668 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >= 1669 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 1670 G_RAID_DEBUG1(1, sc, "No free partitions on disk %s", 1671 g_raid_get_diskname(disk)); 1672 goto nofit; 1673 } 1674 ddf_meta_unused_range(&pd->pd_meta, &eoff, &esize); 1675 if (esize == 0) { 1676 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 1677 g_raid_get_diskname(disk)); 1678 goto nofit; 1679 } 1680 eoff *= pd->pd_meta.sectorsize; 1681 esize *= pd->pd_meta.sectorsize; 1682 size = INT64_MAX; 1683 for (i = 0; i < vol->v_disks_count; i++) { 1684 sd = &vol->v_subdisks[i]; 1685 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 1686 size = sd->sd_size; 1687 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 1688 (disk_pos < 0 || 1689 vol->v_subdisks[i].sd_state < sd->sd_state)) 1690 disk_pos = i; 1691 } 1692 if (disk_pos >= 0 && 1693 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 1694 esize < size) { 1695 G_RAID_DEBUG1(1, sc, "Disk %s free space " 1696 "is too small (%ju < %ju)", 1697 g_raid_get_diskname(disk), esize, size); 1698 disk_pos = -1; 1699 } 1700 if (disk_pos >= 0) { 1701 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 1702 esize = size; 1703 md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX 1704 md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX 1705 } else { 1706 nofit: 1707 if (disk->d_state == G_RAID_DISK_S_NONE) 1708 g_raid_change_disk_state(disk, 1709 G_RAID_DISK_S_STALE); 1710 return (0); 1711 } 1712 1713 /* 1714 * If spare is committable, delete spare record. 1715 * Othersize, mark it active and leave there. 1716 */ 1717 sa = ddf_meta_find_sa(&pd->pd_meta, 0); 1718 if (sa != NULL) { 1719 if ((GET8D(&pd->pd_meta, sa->Spare_Type) & 1720 DDF_SAR_TYPE_REVERTIBLE) == 0) { 1721 SET32D(&pd->pd_meta, sa->Signature, 0xffffffff); 1722 } else { 1723 SET8D(&pd->pd_meta, sa->Spare_Type, 1724 GET8D(&pd->pd_meta, sa->Spare_Type) | 1725 DDF_SAR_TYPE_ACTIVE); 1726 } 1727 } 1728 1729 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 1730 g_raid_get_diskname(disk), disk_pos, vol->v_name); 1731 resurrection = 1; 1732 } 1733 1734 sd = &vol->v_subdisks[disk_pos]; 1735 1736 if (resurrection && sd->sd_disk != NULL) { 1737 g_raid_change_disk_state(sd->sd_disk, 1738 G_RAID_DISK_S_STALE_FAILED); 1739 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 1740 sd, sd_next); 1741 } 1742 vol->v_subdisks[disk_pos].sd_disk = disk; 1743 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1744 1745 /* Welcome the new disk. */ 1746 if (resurrection) 1747 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 1748 else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) 1749 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 1750 else 1751 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 1752 1753 if (resurrection) { 1754 sd->sd_offset = eoff; 1755 sd->sd_size = esize; 1756 } else if (pdmeta->cr != NULL && 1757 (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) { 1758 val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 1759 sd->sd_offset = (off_t)GET64P(pdmeta, val2 + md_disk_pos) * 512; 1760 sd->sd_size = (off_t)GET64D(pdmeta, vdc1->Block_Count) * 512; 1761 } 1762 1763 if (resurrection) { 1764 /* Stale disk, almost same as new. */ 1765 g_raid_change_subdisk_state(sd, 1766 G_RAID_SUBDISK_S_NEW); 1767 } else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) { 1768 /* Failed disk. */ 1769 g_raid_change_subdisk_state(sd, 1770 G_RAID_SUBDISK_S_FAILED); 1771 } else if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & 1772 (DDF_PDE_FAILED | DDF_PDE_REBUILD)) != 0) { 1773 /* Rebuilding disk. */ 1774 g_raid_change_subdisk_state(sd, 1775 G_RAID_SUBDISK_S_REBUILD); 1776 sd->sd_rebuild_pos = 0; 1777 } else if ((GET8(vmeta, vde->VD_State) & DDF_VDE_DIRTY) != 0 || 1778 (GET8(vmeta, vde->Init_State) & DDF_VDE_INIT_MASK) != 1779 DDF_VDE_INIT_FULL) { 1780 /* Stale disk or dirty volume (unclean shutdown). */ 1781 g_raid_change_subdisk_state(sd, 1782 G_RAID_SUBDISK_S_STALE); 1783 } else { 1784 /* Up to date disk. */ 1785 g_raid_change_subdisk_state(sd, 1786 G_RAID_SUBDISK_S_ACTIVE); 1787 } 1788 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1789 G_RAID_EVENT_SUBDISK); 1790 1791 return (resurrection); 1792 } 1793 1794 static void 1795 g_raid_md_ddf_refill(struct g_raid_softc *sc) 1796 { 1797 struct g_raid_volume *vol; 1798 struct g_raid_subdisk *sd; 1799 struct g_raid_disk *disk; 1800 struct g_raid_md_object *md; 1801 struct g_raid_md_ddf_perdisk *pd; 1802 struct g_raid_md_ddf_pervolume *pv; 1803 int update, updated, i, bad; 1804 1805 md = sc->sc_md; 1806 restart: 1807 updated = 0; 1808 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1809 pv = vol->v_md_data; 1810 if (!pv->pv_started || vol->v_stopping) 1811 continue; 1812 1813 /* Search for subdisk that needs replacement. */ 1814 bad = 0; 1815 for (i = 0; i < vol->v_disks_count; i++) { 1816 sd = &vol->v_subdisks[i]; 1817 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 1818 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 1819 bad = 1; 1820 } 1821 if (!bad) 1822 continue; 1823 1824 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 1825 "trying to refill.", vol->v_name); 1826 1827 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1828 /* Skip failed. */ 1829 if (disk->d_state < G_RAID_DISK_S_SPARE) 1830 continue; 1831 /* Skip already used by this volume. */ 1832 for (i = 0; i < vol->v_disks_count; i++) { 1833 sd = &vol->v_subdisks[i]; 1834 if (sd->sd_disk == disk) 1835 break; 1836 } 1837 if (i < vol->v_disks_count) 1838 continue; 1839 1840 /* Try to use disk if it has empty extents. */ 1841 pd = disk->d_md_data; 1842 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) < 1843 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 1844 update = g_raid_md_ddf_start_disk(disk, vol); 1845 } else 1846 update = 0; 1847 if (update) { 1848 updated = 1; 1849 g_raid_md_write_ddf(md, vol, NULL, disk); 1850 break; 1851 } 1852 } 1853 } 1854 if (updated) 1855 goto restart; 1856 } 1857 1858 static void 1859 g_raid_md_ddf_start(struct g_raid_volume *vol) 1860 { 1861 struct g_raid_softc *sc; 1862 struct g_raid_subdisk *sd; 1863 struct g_raid_disk *disk; 1864 struct g_raid_md_object *md; 1865 struct g_raid_md_ddf_perdisk *pd; 1866 struct g_raid_md_ddf_pervolume *pv; 1867 struct g_raid_md_ddf_object *mdi; 1868 struct ddf_vol_meta *vmeta; 1869 struct ddf_vdc_record *vdc; 1870 uint64_t *val2; 1871 int i, j, bvd; 1872 1873 sc = vol->v_softc; 1874 md = sc->sc_md; 1875 mdi = (struct g_raid_md_ddf_object *)md; 1876 pv = vol->v_md_data; 1877 vmeta = &pv->pv_meta; 1878 vdc = vmeta->vdc; 1879 1880 vol->v_raid_level = GET8(vmeta, vdc->Primary_RAID_Level); 1881 vol->v_raid_level_qualifier = GET8(vmeta, vdc->RLQ); 1882 if (GET8(vmeta, vdc->Secondary_Element_Count) > 1 && 1883 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 && 1884 GET8(vmeta, vdc->Secondary_RAID_Level) == 0) 1885 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 1886 vol->v_sectorsize = GET16(vmeta, vdc->Block_Size); 1887 if (vol->v_sectorsize == 0xffff) 1888 vol->v_sectorsize = vmeta->sectorsize; 1889 vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size); 1890 vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) * 1891 GET8(vmeta, vdc->Secondary_Element_Count); 1892 vol->v_mdf_pdisks = GET8(vmeta, vdc->MDF_Parity_Disks); 1893 vol->v_mdf_polynomial = GET16(vmeta, vdc->MDF_Parity_Generator_Polynomial); 1894 vol->v_mdf_method = GET8(vmeta, vdc->MDF_Constant_Generation_Method); 1895 if (GET8(vmeta, vdc->Rotate_Parity_count) > 31) 1896 vol->v_rotate_parity = 1; 1897 else 1898 vol->v_rotate_parity = 1 << GET8(vmeta, vdc->Rotate_Parity_count); 1899 vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize; 1900 for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) { 1901 if (j == GET16(vmeta, vdc->Primary_Element_Count)) { 1902 j = 0; 1903 bvd++; 1904 } 1905 sd = &vol->v_subdisks[i]; 1906 if (vmeta->bvdc[bvd] == NULL) { 1907 sd->sd_offset = 0; 1908 sd->sd_size = GET64(vmeta, vdc->Block_Count) * 1909 vol->v_sectorsize; 1910 continue; 1911 } 1912 val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[ 1913 GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 1914 sd->sd_offset = GET64P(vmeta, val2 + j) * vol->v_sectorsize; 1915 sd->sd_size = GET64(vmeta, bvdc[bvd]->Block_Count) * 1916 vol->v_sectorsize; 1917 } 1918 g_raid_start_volume(vol); 1919 1920 /* Make all disks found till the moment take their places. */ 1921 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1922 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1923 if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != NULL) 1924 g_raid_md_ddf_start_disk(disk, vol); 1925 } 1926 1927 pv->pv_started = 1; 1928 mdi->mdio_starting--; 1929 callout_stop(&pv->pv_start_co); 1930 G_RAID_DEBUG1(0, sc, "Volume started."); 1931 g_raid_md_write_ddf(md, vol, NULL, NULL); 1932 1933 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1934 g_raid_md_ddf_refill(sc); 1935 1936 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 1937 } 1938 1939 static void 1940 g_raid_ddf_go(void *arg) 1941 { 1942 struct g_raid_volume *vol; 1943 struct g_raid_softc *sc; 1944 struct g_raid_md_ddf_pervolume *pv; 1945 1946 vol = arg; 1947 pv = vol->v_md_data; 1948 sc = vol->v_softc; 1949 if (!pv->pv_started) { 1950 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 1951 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 1952 G_RAID_EVENT_VOLUME); 1953 } 1954 } 1955 1956 static void 1957 g_raid_md_ddf_new_disk(struct g_raid_disk *disk) 1958 { 1959 struct g_raid_softc *sc; 1960 struct g_raid_md_object *md; 1961 struct g_raid_md_ddf_perdisk *pd; 1962 struct g_raid_md_ddf_pervolume *pv; 1963 struct g_raid_md_ddf_object *mdi; 1964 struct g_raid_volume *vol; 1965 struct ddf_meta *pdmeta; 1966 struct ddf_vol_meta *vmeta; 1967 struct ddf_vdc_record *vdc; 1968 struct ddf_vd_entry *vde; 1969 int i, j, k, num, have, need, cnt, spare; 1970 uint32_t val; 1971 char buf[17]; 1972 1973 sc = disk->d_softc; 1974 md = sc->sc_md; 1975 mdi = (struct g_raid_md_ddf_object *)md; 1976 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1977 pdmeta = &pd->pd_meta; 1978 spare = -1; 1979 1980 if (mdi->mdio_meta.hdr == NULL) 1981 ddf_meta_copy(&mdi->mdio_meta, pdmeta); 1982 else 1983 ddf_meta_update(&mdi->mdio_meta, pdmeta); 1984 1985 num = GETCRNUM(pdmeta); 1986 for (j = 0; j < num; j++) { 1987 vdc = GETVDCPTR(pdmeta, j); 1988 val = GET32D(pdmeta, vdc->Signature); 1989 1990 if (val == DDF_SA_SIGNATURE && spare == -1) 1991 spare = 1; 1992 1993 if (val != DDF_VDCR_SIGNATURE) 1994 continue; 1995 spare = 0; 1996 k = ddf_meta_find_vd(pdmeta, vdc->VD_GUID); 1997 if (k < 0) 1998 continue; 1999 vde = &pdmeta->vdr->entry[k]; 2000 2001 /* Look for volume with matching ID. */ 2002 vol = g_raid_md_ddf_get_volume(sc, vdc->VD_GUID); 2003 if (vol == NULL) { 2004 ddf_meta_get_name(pdmeta, k, buf); 2005 vol = g_raid_create_volume(sc, buf, 2006 GET16D(pdmeta, vde->VD_Number)); 2007 pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO); 2008 vol->v_md_data = pv; 2009 callout_init(&pv->pv_start_co, 1); 2010 callout_reset(&pv->pv_start_co, 2011 g_raid_start_timeout * hz, 2012 g_raid_ddf_go, vol); 2013 mdi->mdio_starting++; 2014 } else 2015 pv = vol->v_md_data; 2016 2017 /* If we haven't started yet - check metadata freshness. */ 2018 vmeta = &pv->pv_meta; 2019 ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started); 2020 } 2021 2022 if (spare == 1) { 2023 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 2024 g_raid_md_ddf_refill(sc); 2025 } 2026 2027 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2028 pv = vol->v_md_data; 2029 vmeta = &pv->pv_meta; 2030 2031 if (ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID) == NULL) 2032 continue; 2033 2034 if (pv->pv_started) { 2035 if (g_raid_md_ddf_start_disk(disk, vol)) 2036 g_raid_md_write_ddf(md, vol, NULL, NULL); 2037 continue; 2038 } 2039 2040 /* If we collected all needed disks - start array. */ 2041 need = 0; 2042 have = 0; 2043 for (k = 0; k < GET8(vmeta, vdc->Secondary_Element_Count); k++) { 2044 if (vmeta->bvdc[k] == NULL) { 2045 need += GET16(vmeta, vdc->Primary_Element_Count); 2046 continue; 2047 } 2048 cnt = GET16(vmeta, bvdc[k]->Primary_Element_Count); 2049 need += cnt; 2050 for (i = 0; i < cnt; i++) { 2051 val = GET32(vmeta, bvdc[k]->Physical_Disk_Sequence[i]); 2052 if (g_raid_md_ddf_get_disk(sc, NULL, val) != NULL) 2053 have++; 2054 } 2055 } 2056 G_RAID_DEBUG1(1, sc, "Volume %s now has %d of %d disks", 2057 vol->v_name, have, need); 2058 if (have == need) 2059 g_raid_md_ddf_start(vol); 2060 } 2061 } 2062 2063 static int 2064 g_raid_md_create_req_ddf(struct g_raid_md_object *md, struct g_class *mp, 2065 struct gctl_req *req, struct g_geom **gp) 2066 { 2067 struct g_geom *geom; 2068 struct g_raid_softc *sc; 2069 struct g_raid_md_ddf_object *mdi, *mdi1; 2070 char name[16]; 2071 const char *fmtopt; 2072 int be = 1; 2073 2074 mdi = (struct g_raid_md_ddf_object *)md; 2075 fmtopt = gctl_get_asciiparam(req, "fmtopt"); 2076 if (fmtopt == NULL || strcasecmp(fmtopt, "BE") == 0) 2077 be = 1; 2078 else if (strcasecmp(fmtopt, "LE") == 0) 2079 be = 0; 2080 else { 2081 gctl_error(req, "Incorrect fmtopt argument."); 2082 return (G_RAID_MD_TASTE_FAIL); 2083 } 2084 2085 /* Search for existing node. */ 2086 LIST_FOREACH(geom, &mp->geom, geom) { 2087 sc = geom->softc; 2088 if (sc == NULL) 2089 continue; 2090 if (sc->sc_stopping != 0) 2091 continue; 2092 if (sc->sc_md->mdo_class != md->mdo_class) 2093 continue; 2094 mdi1 = (struct g_raid_md_ddf_object *)sc->sc_md; 2095 if (mdi1->mdio_bigendian != be) 2096 continue; 2097 break; 2098 } 2099 if (geom != NULL) { 2100 *gp = geom; 2101 return (G_RAID_MD_TASTE_EXISTING); 2102 } 2103 2104 /* Create new one if not found. */ 2105 mdi->mdio_bigendian = be; 2106 snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE"); 2107 sc = g_raid_create_node(mp, name, md); 2108 if (sc == NULL) 2109 return (G_RAID_MD_TASTE_FAIL); 2110 md->mdo_softc = sc; 2111 *gp = sc->sc_geom; 2112 return (G_RAID_MD_TASTE_NEW); 2113 } 2114 2115 static int 2116 g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp, 2117 struct g_consumer *cp, struct g_geom **gp) 2118 { 2119 struct g_consumer *rcp; 2120 struct g_provider *pp; 2121 struct g_raid_softc *sc; 2122 struct g_raid_disk *disk; 2123 struct ddf_meta meta; 2124 struct g_raid_md_ddf_perdisk *pd; 2125 struct g_raid_md_ddf_object *mdi; 2126 struct g_geom *geom; 2127 int error, result, be; 2128 char name[16]; 2129 2130 G_RAID_DEBUG(1, "Tasting DDF on %s", cp->provider->name); 2131 mdi = (struct g_raid_md_ddf_object *)md; 2132 pp = cp->provider; 2133 2134 /* Read metadata from device. */ 2135 g_topology_unlock(); 2136 bzero(&meta, sizeof(meta)); 2137 error = ddf_meta_read(cp, &meta); 2138 g_topology_lock(); 2139 if (error != 0) 2140 return (G_RAID_MD_TASTE_FAIL); 2141 be = meta.bigendian; 2142 2143 /* Metadata valid. Print it. */ 2144 g_raid_md_ddf_print(&meta); 2145 2146 /* Search for matching node. */ 2147 sc = NULL; 2148 LIST_FOREACH(geom, &mp->geom, geom) { 2149 sc = geom->softc; 2150 if (sc == NULL) 2151 continue; 2152 if (sc->sc_stopping != 0) 2153 continue; 2154 if (sc->sc_md->mdo_class != md->mdo_class) 2155 continue; 2156 mdi = (struct g_raid_md_ddf_object *)sc->sc_md; 2157 if (mdi->mdio_bigendian != be) 2158 continue; 2159 break; 2160 } 2161 2162 /* Found matching node. */ 2163 if (geom != NULL) { 2164 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 2165 result = G_RAID_MD_TASTE_EXISTING; 2166 2167 } else { /* Not found matching node -- create one. */ 2168 result = G_RAID_MD_TASTE_NEW; 2169 mdi->mdio_bigendian = be; 2170 snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE"); 2171 sc = g_raid_create_node(mp, name, md); 2172 md->mdo_softc = sc; 2173 geom = sc->sc_geom; 2174 } 2175 2176 /* There is no return after this point, so we close passed consumer. */ 2177 g_access(cp, -1, 0, 0); 2178 2179 rcp = g_new_consumer(geom); 2180 rcp->flags |= G_CF_DIRECT_RECEIVE; 2181 g_attach(rcp, pp); 2182 if (g_access(rcp, 1, 1, 1) != 0) 2183 ; //goto fail1; 2184 2185 g_topology_unlock(); 2186 sx_xlock(&sc->sc_lock); 2187 2188 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2189 pd->pd_meta = meta; 2190 disk = g_raid_create_disk(sc); 2191 disk->d_md_data = (void *)pd; 2192 disk->d_consumer = rcp; 2193 rcp->private = disk; 2194 2195 g_raid_get_disk_info(disk); 2196 2197 g_raid_md_ddf_new_disk(disk); 2198 2199 sx_xunlock(&sc->sc_lock); 2200 g_topology_lock(); 2201 *gp = geom; 2202 return (result); 2203 } 2204 2205 static int 2206 g_raid_md_event_ddf(struct g_raid_md_object *md, 2207 struct g_raid_disk *disk, u_int event) 2208 { 2209 struct g_raid_softc *sc; 2210 2211 sc = md->mdo_softc; 2212 if (disk == NULL) 2213 return (-1); 2214 switch (event) { 2215 case G_RAID_DISK_E_DISCONNECTED: 2216 /* Delete disk. */ 2217 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 2218 g_raid_destroy_disk(disk); 2219 g_raid_md_ddf_purge_volumes(sc); 2220 2221 /* Write updated metadata to all disks. */ 2222 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2223 2224 /* Check if anything left. */ 2225 if (g_raid_ndisks(sc, -1) == 0) 2226 g_raid_destroy_node(sc, 0); 2227 else 2228 g_raid_md_ddf_refill(sc); 2229 return (0); 2230 } 2231 return (-2); 2232 } 2233 2234 static int 2235 g_raid_md_volume_event_ddf(struct g_raid_md_object *md, 2236 struct g_raid_volume *vol, u_int event) 2237 { 2238 struct g_raid_md_ddf_pervolume *pv; 2239 2240 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2241 switch (event) { 2242 case G_RAID_VOLUME_E_STARTMD: 2243 if (!pv->pv_started) 2244 g_raid_md_ddf_start(vol); 2245 return (0); 2246 } 2247 return (-2); 2248 } 2249 2250 static int 2251 g_raid_md_ctl_ddf(struct g_raid_md_object *md, 2252 struct gctl_req *req) 2253 { 2254 struct g_raid_softc *sc; 2255 struct g_raid_volume *vol, *vol1; 2256 struct g_raid_subdisk *sd; 2257 struct g_raid_disk *disk, *disks[DDF_MAX_DISKS_HARD]; 2258 struct g_raid_md_ddf_perdisk *pd; 2259 struct g_raid_md_ddf_pervolume *pv; 2260 struct g_raid_md_ddf_object *mdi; 2261 struct ddf_sa_record *sa; 2262 struct g_consumer *cp; 2263 struct g_provider *pp; 2264 char arg[16]; 2265 const char *nodename, *verb, *volname, *levelname, *diskname; 2266 char *tmp; 2267 int *nargs, *force; 2268 off_t size, sectorsize, strip, offs[DDF_MAX_DISKS_HARD], esize; 2269 intmax_t *sizearg, *striparg; 2270 int i, numdisks, len, level, qual; 2271 int error; 2272 2273 sc = md->mdo_softc; 2274 mdi = (struct g_raid_md_ddf_object *)md; 2275 verb = gctl_get_param(req, "verb", NULL); 2276 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 2277 error = 0; 2278 2279 if (strcmp(verb, "label") == 0) { 2280 2281 if (*nargs < 4) { 2282 gctl_error(req, "Invalid number of arguments."); 2283 return (-1); 2284 } 2285 volname = gctl_get_asciiparam(req, "arg1"); 2286 if (volname == NULL) { 2287 gctl_error(req, "No volume name."); 2288 return (-2); 2289 } 2290 levelname = gctl_get_asciiparam(req, "arg2"); 2291 if (levelname == NULL) { 2292 gctl_error(req, "No RAID level."); 2293 return (-3); 2294 } 2295 if (g_raid_volume_str2level(levelname, &level, &qual)) { 2296 gctl_error(req, "Unknown RAID level '%s'.", levelname); 2297 return (-4); 2298 } 2299 numdisks = *nargs - 3; 2300 force = gctl_get_paraml(req, "force", sizeof(*force)); 2301 if (!g_raid_md_ddf_supported(level, qual, numdisks, 2302 force ? *force : 0)) { 2303 gctl_error(req, "Unsupported RAID level " 2304 "(0x%02x/0x%02x), or number of disks (%d).", 2305 level, qual, numdisks); 2306 return (-5); 2307 } 2308 2309 /* Search for disks, connect them and probe. */ 2310 size = INT64_MAX; 2311 sectorsize = 0; 2312 bzero(disks, sizeof(disks)); 2313 bzero(offs, sizeof(offs)); 2314 for (i = 0; i < numdisks; i++) { 2315 snprintf(arg, sizeof(arg), "arg%d", i + 3); 2316 diskname = gctl_get_asciiparam(req, arg); 2317 if (diskname == NULL) { 2318 gctl_error(req, "No disk name (%s).", arg); 2319 error = -6; 2320 break; 2321 } 2322 if (strcmp(diskname, "NONE") == 0) 2323 continue; 2324 2325 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2326 if (disk->d_consumer != NULL && 2327 disk->d_consumer->provider != NULL && 2328 strcmp(disk->d_consumer->provider->name, 2329 diskname) == 0) 2330 break; 2331 } 2332 if (disk != NULL) { 2333 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 2334 gctl_error(req, "Disk '%s' is in a " 2335 "wrong state (%s).", diskname, 2336 g_raid_disk_state2str(disk->d_state)); 2337 error = -7; 2338 break; 2339 } 2340 pd = disk->d_md_data; 2341 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >= 2342 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 2343 gctl_error(req, "No free partitions " 2344 "on disk '%s'.", 2345 diskname); 2346 error = -7; 2347 break; 2348 } 2349 pp = disk->d_consumer->provider; 2350 disks[i] = disk; 2351 ddf_meta_unused_range(&pd->pd_meta, 2352 &offs[i], &esize); 2353 offs[i] *= pp->sectorsize; 2354 size = MIN(size, (off_t)esize * pp->sectorsize); 2355 sectorsize = MAX(sectorsize, pp->sectorsize); 2356 continue; 2357 } 2358 2359 g_topology_lock(); 2360 cp = g_raid_open_consumer(sc, diskname); 2361 if (cp == NULL) { 2362 gctl_error(req, "Can't open disk '%s'.", 2363 diskname); 2364 g_topology_unlock(); 2365 error = -8; 2366 break; 2367 } 2368 pp = cp->provider; 2369 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2370 disk = g_raid_create_disk(sc); 2371 disk->d_md_data = (void *)pd; 2372 disk->d_consumer = cp; 2373 disks[i] = disk; 2374 cp->private = disk; 2375 ddf_meta_create(disk, &mdi->mdio_meta); 2376 if (mdi->mdio_meta.hdr == NULL) 2377 ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta); 2378 else 2379 ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta); 2380 g_topology_unlock(); 2381 2382 g_raid_get_disk_info(disk); 2383 2384 /* Reserve some space for metadata. */ 2385 size = MIN(size, GET64(&pd->pd_meta, 2386 pdr->entry[0].Configured_Size) * pp->sectorsize); 2387 sectorsize = MAX(sectorsize, pp->sectorsize); 2388 } 2389 if (error != 0) { 2390 for (i = 0; i < numdisks; i++) { 2391 if (disks[i] != NULL && 2392 disks[i]->d_state == G_RAID_DISK_S_NONE) 2393 g_raid_destroy_disk(disks[i]); 2394 } 2395 return (error); 2396 } 2397 2398 if (sectorsize <= 0) { 2399 gctl_error(req, "Can't get sector size."); 2400 return (-8); 2401 } 2402 2403 /* Handle size argument. */ 2404 len = sizeof(*sizearg); 2405 sizearg = gctl_get_param(req, "size", &len); 2406 if (sizearg != NULL && len == sizeof(*sizearg) && 2407 *sizearg > 0) { 2408 if (*sizearg > size) { 2409 gctl_error(req, "Size too big %lld > %lld.", 2410 (long long)*sizearg, (long long)size); 2411 return (-9); 2412 } 2413 size = *sizearg; 2414 } 2415 2416 /* Handle strip argument. */ 2417 strip = 131072; 2418 len = sizeof(*striparg); 2419 striparg = gctl_get_param(req, "strip", &len); 2420 if (striparg != NULL && len == sizeof(*striparg) && 2421 *striparg > 0) { 2422 if (*striparg < sectorsize) { 2423 gctl_error(req, "Strip size too small."); 2424 return (-10); 2425 } 2426 if (*striparg % sectorsize != 0) { 2427 gctl_error(req, "Incorrect strip size."); 2428 return (-11); 2429 } 2430 strip = *striparg; 2431 } 2432 2433 /* Round size down to strip or sector. */ 2434 if (level == G_RAID_VOLUME_RL_RAID1 || 2435 level == G_RAID_VOLUME_RL_RAID3 || 2436 level == G_RAID_VOLUME_RL_SINGLE || 2437 level == G_RAID_VOLUME_RL_CONCAT) 2438 size -= (size % sectorsize); 2439 else if (level == G_RAID_VOLUME_RL_RAID1E && 2440 (numdisks & 1) != 0) 2441 size -= (size % (2 * strip)); 2442 else 2443 size -= (size % strip); 2444 if (size <= 0) { 2445 gctl_error(req, "Size too small."); 2446 return (-13); 2447 } 2448 2449 /* We have all we need, create things: volume, ... */ 2450 pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO); 2451 ddf_vol_meta_create(&pv->pv_meta, &mdi->mdio_meta); 2452 pv->pv_started = 1; 2453 vol = g_raid_create_volume(sc, volname, -1); 2454 vol->v_md_data = pv; 2455 vol->v_raid_level = level; 2456 vol->v_raid_level_qualifier = qual; 2457 vol->v_strip_size = strip; 2458 vol->v_disks_count = numdisks; 2459 if (level == G_RAID_VOLUME_RL_RAID0 || 2460 level == G_RAID_VOLUME_RL_CONCAT || 2461 level == G_RAID_VOLUME_RL_SINGLE) 2462 vol->v_mediasize = size * numdisks; 2463 else if (level == G_RAID_VOLUME_RL_RAID1) 2464 vol->v_mediasize = size; 2465 else if (level == G_RAID_VOLUME_RL_RAID3 || 2466 level == G_RAID_VOLUME_RL_RAID4 || 2467 level == G_RAID_VOLUME_RL_RAID5) 2468 vol->v_mediasize = size * (numdisks - 1); 2469 else if (level == G_RAID_VOLUME_RL_RAID5R) { 2470 vol->v_mediasize = size * (numdisks - 1); 2471 vol->v_rotate_parity = 1024; 2472 } else if (level == G_RAID_VOLUME_RL_RAID6 || 2473 level == G_RAID_VOLUME_RL_RAID5E || 2474 level == G_RAID_VOLUME_RL_RAID5EE) 2475 vol->v_mediasize = size * (numdisks - 2); 2476 else if (level == G_RAID_VOLUME_RL_RAIDMDF) { 2477 if (numdisks < 5) 2478 vol->v_mdf_pdisks = 2; 2479 else 2480 vol->v_mdf_pdisks = 3; 2481 vol->v_mdf_polynomial = 0x11d; 2482 vol->v_mdf_method = 0x00; 2483 vol->v_mediasize = size * (numdisks - vol->v_mdf_pdisks); 2484 } else { /* RAID1E */ 2485 vol->v_mediasize = ((size * numdisks) / strip / 2) * 2486 strip; 2487 } 2488 vol->v_sectorsize = sectorsize; 2489 g_raid_start_volume(vol); 2490 2491 /* , and subdisks. */ 2492 for (i = 0; i < numdisks; i++) { 2493 disk = disks[i]; 2494 sd = &vol->v_subdisks[i]; 2495 sd->sd_disk = disk; 2496 sd->sd_offset = offs[i]; 2497 sd->sd_size = size; 2498 if (disk == NULL) 2499 continue; 2500 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 2501 g_raid_change_disk_state(disk, 2502 G_RAID_DISK_S_ACTIVE); 2503 g_raid_change_subdisk_state(sd, 2504 G_RAID_SUBDISK_S_ACTIVE); 2505 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 2506 G_RAID_EVENT_SUBDISK); 2507 } 2508 2509 /* Write metadata based on created entities. */ 2510 G_RAID_DEBUG1(0, sc, "Array started."); 2511 g_raid_md_write_ddf(md, vol, NULL, NULL); 2512 2513 /* Pickup any STALE/SPARE disks to refill array if needed. */ 2514 g_raid_md_ddf_refill(sc); 2515 2516 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 2517 G_RAID_EVENT_VOLUME); 2518 return (0); 2519 } 2520 if (strcmp(verb, "add") == 0) { 2521 2522 gctl_error(req, "`add` command is not applicable, " 2523 "use `label` instead."); 2524 return (-99); 2525 } 2526 if (strcmp(verb, "delete") == 0) { 2527 2528 nodename = gctl_get_asciiparam(req, "arg0"); 2529 if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0) 2530 nodename = NULL; 2531 2532 /* Full node destruction. */ 2533 if (*nargs == 1 && nodename != NULL) { 2534 /* Check if some volume is still open. */ 2535 force = gctl_get_paraml(req, "force", sizeof(*force)); 2536 if (force != NULL && *force == 0 && 2537 g_raid_nopens(sc) != 0) { 2538 gctl_error(req, "Some volume is still open."); 2539 return (-4); 2540 } 2541 2542 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2543 if (disk->d_consumer) 2544 ddf_meta_erase(disk->d_consumer); 2545 } 2546 g_raid_destroy_node(sc, 0); 2547 return (0); 2548 } 2549 2550 /* Destroy specified volume. If it was last - all node. */ 2551 if (*nargs > 2) { 2552 gctl_error(req, "Invalid number of arguments."); 2553 return (-1); 2554 } 2555 volname = gctl_get_asciiparam(req, 2556 nodename != NULL ? "arg1" : "arg0"); 2557 if (volname == NULL) { 2558 gctl_error(req, "No volume name."); 2559 return (-2); 2560 } 2561 2562 /* Search for volume. */ 2563 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2564 if (strcmp(vol->v_name, volname) == 0) 2565 break; 2566 pp = vol->v_provider; 2567 if (pp == NULL) 2568 continue; 2569 if (strcmp(pp->name, volname) == 0) 2570 break; 2571 if (strncmp(pp->name, "raid/", 5) == 0 && 2572 strcmp(pp->name + 5, volname) == 0) 2573 break; 2574 } 2575 if (vol == NULL) { 2576 i = strtol(volname, &tmp, 10); 2577 if (verb != volname && tmp[0] == 0) { 2578 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2579 if (vol->v_global_id == i) 2580 break; 2581 } 2582 } 2583 } 2584 if (vol == NULL) { 2585 gctl_error(req, "Volume '%s' not found.", volname); 2586 return (-3); 2587 } 2588 2589 /* Check if volume is still open. */ 2590 force = gctl_get_paraml(req, "force", sizeof(*force)); 2591 if (force != NULL && *force == 0 && 2592 vol->v_provider_open != 0) { 2593 gctl_error(req, "Volume is still open."); 2594 return (-4); 2595 } 2596 2597 /* Destroy volume and potentially node. */ 2598 i = 0; 2599 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 2600 i++; 2601 if (i >= 2) { 2602 g_raid_destroy_volume(vol); 2603 g_raid_md_ddf_purge_disks(sc); 2604 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2605 } else { 2606 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2607 if (disk->d_consumer) 2608 ddf_meta_erase(disk->d_consumer); 2609 } 2610 g_raid_destroy_node(sc, 0); 2611 } 2612 return (0); 2613 } 2614 if (strcmp(verb, "remove") == 0 || 2615 strcmp(verb, "fail") == 0) { 2616 if (*nargs < 2) { 2617 gctl_error(req, "Invalid number of arguments."); 2618 return (-1); 2619 } 2620 for (i = 1; i < *nargs; i++) { 2621 snprintf(arg, sizeof(arg), "arg%d", i); 2622 diskname = gctl_get_asciiparam(req, arg); 2623 if (diskname == NULL) { 2624 gctl_error(req, "No disk name (%s).", arg); 2625 error = -2; 2626 break; 2627 } 2628 if (strncmp(diskname, "/dev/", 5) == 0) 2629 diskname += 5; 2630 2631 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2632 if (disk->d_consumer != NULL && 2633 disk->d_consumer->provider != NULL && 2634 strcmp(disk->d_consumer->provider->name, 2635 diskname) == 0) 2636 break; 2637 } 2638 if (disk == NULL) { 2639 gctl_error(req, "Disk '%s' not found.", 2640 diskname); 2641 error = -3; 2642 break; 2643 } 2644 2645 if (strcmp(verb, "fail") == 0) { 2646 g_raid_md_fail_disk_ddf(md, NULL, disk); 2647 continue; 2648 } 2649 2650 /* Erase metadata on deleting disk and destroy it. */ 2651 ddf_meta_erase(disk->d_consumer); 2652 g_raid_destroy_disk(disk); 2653 } 2654 g_raid_md_ddf_purge_volumes(sc); 2655 2656 /* Write updated metadata to remaining disks. */ 2657 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2658 2659 /* Check if anything left. */ 2660 if (g_raid_ndisks(sc, -1) == 0) 2661 g_raid_destroy_node(sc, 0); 2662 else 2663 g_raid_md_ddf_refill(sc); 2664 return (error); 2665 } 2666 if (strcmp(verb, "insert") == 0) { 2667 if (*nargs < 2) { 2668 gctl_error(req, "Invalid number of arguments."); 2669 return (-1); 2670 } 2671 for (i = 1; i < *nargs; i++) { 2672 /* Get disk name. */ 2673 snprintf(arg, sizeof(arg), "arg%d", i); 2674 diskname = gctl_get_asciiparam(req, arg); 2675 if (diskname == NULL) { 2676 gctl_error(req, "No disk name (%s).", arg); 2677 error = -3; 2678 break; 2679 } 2680 2681 /* Try to find provider with specified name. */ 2682 g_topology_lock(); 2683 cp = g_raid_open_consumer(sc, diskname); 2684 if (cp == NULL) { 2685 gctl_error(req, "Can't open disk '%s'.", 2686 diskname); 2687 g_topology_unlock(); 2688 error = -4; 2689 break; 2690 } 2691 pp = cp->provider; 2692 g_topology_unlock(); 2693 2694 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2695 2696 disk = g_raid_create_disk(sc); 2697 disk->d_consumer = cp; 2698 disk->d_md_data = (void *)pd; 2699 cp->private = disk; 2700 2701 g_raid_get_disk_info(disk); 2702 2703 /* Welcome the "new" disk. */ 2704 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 2705 ddf_meta_create(disk, &mdi->mdio_meta); 2706 sa = ddf_meta_find_sa(&pd->pd_meta, 1); 2707 if (sa != NULL) { 2708 SET32D(&pd->pd_meta, sa->Signature, 2709 DDF_SA_SIGNATURE); 2710 SET8D(&pd->pd_meta, sa->Spare_Type, 0); 2711 SET16D(&pd->pd_meta, sa->Populated_SAEs, 0); 2712 SET16D(&pd->pd_meta, sa->MAX_SAE_Supported, 2713 (GET16(&pd->pd_meta, hdr->Configuration_Record_Length) * 2714 pd->pd_meta.sectorsize - 2715 sizeof(struct ddf_sa_record)) / 2716 sizeof(struct ddf_sa_entry)); 2717 } 2718 if (mdi->mdio_meta.hdr == NULL) 2719 ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta); 2720 else 2721 ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta); 2722 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2723 g_raid_md_ddf_refill(sc); 2724 } 2725 return (error); 2726 } 2727 return (-100); 2728 } 2729 2730 static int 2731 g_raid_md_write_ddf(struct g_raid_md_object *md, struct g_raid_volume *tvol, 2732 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 2733 { 2734 struct g_raid_softc *sc; 2735 struct g_raid_volume *vol; 2736 struct g_raid_subdisk *sd; 2737 struct g_raid_disk *disk; 2738 struct g_raid_md_ddf_perdisk *pd; 2739 struct g_raid_md_ddf_pervolume *pv; 2740 struct g_raid_md_ddf_object *mdi; 2741 struct ddf_meta *gmeta; 2742 struct ddf_vol_meta *vmeta; 2743 struct ddf_vdc_record *vdc; 2744 struct ddf_sa_record *sa; 2745 uint64_t *val2; 2746 int i, j, pos, bvd, size; 2747 2748 sc = md->mdo_softc; 2749 mdi = (struct g_raid_md_ddf_object *)md; 2750 gmeta = &mdi->mdio_meta; 2751 2752 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 2753 return (0); 2754 2755 /* 2756 * Clear disk flags to let only really needed ones to be reset. 2757 * Do it only if there are no volumes in starting state now, 2758 * as they can update disk statuses yet and we may kill innocent. 2759 */ 2760 if (mdi->mdio_starting == 0) { 2761 for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) { 2762 if (isff(gmeta->pdr->entry[i].PD_GUID, 24)) 2763 continue; 2764 SET16(gmeta, pdr->entry[i].PD_Type, 2765 GET16(gmeta, pdr->entry[i].PD_Type) & 2766 ~(DDF_PDE_PARTICIPATING | 2767 DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)); 2768 if ((GET16(gmeta, pdr->entry[i].PD_State) & 2769 DDF_PDE_PFA) == 0) 2770 SET16(gmeta, pdr->entry[i].PD_State, 0); 2771 } 2772 } 2773 2774 /* Generate/update new per-volume metadata. */ 2775 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2776 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2777 if (vol->v_stopping || !pv->pv_started) 2778 continue; 2779 vmeta = &pv->pv_meta; 2780 2781 SET32(vmeta, vdc->Sequence_Number, 2782 GET32(vmeta, vdc->Sequence_Number) + 1); 2783 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E && 2784 vol->v_disks_count % 2 == 0) 2785 SET16(vmeta, vdc->Primary_Element_Count, 2); 2786 else 2787 SET16(vmeta, vdc->Primary_Element_Count, 2788 vol->v_disks_count); 2789 SET8(vmeta, vdc->Stripe_Size, 2790 ffs(vol->v_strip_size / vol->v_sectorsize) - 1); 2791 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E && 2792 vol->v_disks_count % 2 == 0) { 2793 SET8(vmeta, vdc->Primary_RAID_Level, 2794 DDF_VDCR_RAID1); 2795 SET8(vmeta, vdc->RLQ, 0); 2796 SET8(vmeta, vdc->Secondary_Element_Count, 2797 vol->v_disks_count / 2); 2798 SET8(vmeta, vdc->Secondary_RAID_Level, 0); 2799 } else { 2800 SET8(vmeta, vdc->Primary_RAID_Level, 2801 vol->v_raid_level); 2802 SET8(vmeta, vdc->RLQ, 2803 vol->v_raid_level_qualifier); 2804 SET8(vmeta, vdc->Secondary_Element_Count, 1); 2805 SET8(vmeta, vdc->Secondary_RAID_Level, 0); 2806 } 2807 SET8(vmeta, vdc->Secondary_Element_Seq, 0); 2808 SET64(vmeta, vdc->Block_Count, 0); 2809 SET64(vmeta, vdc->VD_Size, vol->v_mediasize / vol->v_sectorsize); 2810 SET16(vmeta, vdc->Block_Size, vol->v_sectorsize); 2811 SET8(vmeta, vdc->Rotate_Parity_count, 2812 fls(vol->v_rotate_parity) - 1); 2813 SET8(vmeta, vdc->MDF_Parity_Disks, vol->v_mdf_pdisks); 2814 SET16(vmeta, vdc->MDF_Parity_Generator_Polynomial, 2815 vol->v_mdf_polynomial); 2816 SET8(vmeta, vdc->MDF_Constant_Generation_Method, 2817 vol->v_mdf_method); 2818 2819 SET16(vmeta, vde->VD_Number, vol->v_global_id); 2820 if (vol->v_state <= G_RAID_VOLUME_S_BROKEN) 2821 SET8(vmeta, vde->VD_State, DDF_VDE_FAILED); 2822 else if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 2823 SET8(vmeta, vde->VD_State, DDF_VDE_DEGRADED); 2824 else if (vol->v_state <= G_RAID_VOLUME_S_SUBOPTIMAL) 2825 SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL); 2826 else 2827 SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL); 2828 if (vol->v_dirty || 2829 g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 || 2830 g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0) 2831 SET8(vmeta, vde->VD_State, 2832 GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY); 2833 SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX 2834 ddf_meta_put_name(vmeta, vol->v_name); 2835 2836 for (i = 0; i < vol->v_disks_count; i++) { 2837 sd = &vol->v_subdisks[i]; 2838 bvd = i / GET16(vmeta, vdc->Primary_Element_Count); 2839 pos = i % GET16(vmeta, vdc->Primary_Element_Count); 2840 disk = sd->sd_disk; 2841 if (disk != NULL) { 2842 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2843 if (vmeta->bvdc[bvd] == NULL) { 2844 size = GET16(vmeta, 2845 hdr->Configuration_Record_Length) * 2846 vmeta->sectorsize; 2847 vmeta->bvdc[bvd] = malloc(size, 2848 M_MD_DDF, M_WAITOK); 2849 memset(vmeta->bvdc[bvd], 0xff, size); 2850 } 2851 memcpy(vmeta->bvdc[bvd], vmeta->vdc, 2852 sizeof(struct ddf_vdc_record)); 2853 SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd); 2854 SET64(vmeta, bvdc[bvd]->Block_Count, 2855 sd->sd_size / vol->v_sectorsize); 2856 SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos], 2857 GET32(&pd->pd_meta, pdd->PD_Reference)); 2858 val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[ 2859 GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 2860 SET64P(vmeta, val2 + pos, 2861 sd->sd_offset / vol->v_sectorsize); 2862 } 2863 if (vmeta->bvdc[bvd] == NULL) 2864 continue; 2865 2866 j = ddf_meta_find_pd(gmeta, NULL, 2867 GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos])); 2868 if (j < 0) 2869 continue; 2870 SET16(gmeta, pdr->entry[j].PD_Type, 2871 GET16(gmeta, pdr->entry[j].PD_Type) | 2872 DDF_PDE_PARTICIPATING); 2873 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) 2874 SET16(gmeta, pdr->entry[j].PD_State, 2875 GET16(gmeta, pdr->entry[j].PD_State) | 2876 (DDF_PDE_FAILED | DDF_PDE_MISSING)); 2877 else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) 2878 SET16(gmeta, pdr->entry[j].PD_State, 2879 GET16(gmeta, pdr->entry[j].PD_State) | 2880 (DDF_PDE_FAILED | DDF_PDE_PFA)); 2881 else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) 2882 SET16(gmeta, pdr->entry[j].PD_State, 2883 GET16(gmeta, pdr->entry[j].PD_State) | 2884 DDF_PDE_REBUILD); 2885 else 2886 SET16(gmeta, pdr->entry[j].PD_State, 2887 GET16(gmeta, pdr->entry[j].PD_State) | 2888 DDF_PDE_ONLINE); 2889 } 2890 } 2891 2892 /* Mark spare and failed disks as such. */ 2893 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2894 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2895 i = ddf_meta_find_pd(gmeta, NULL, 2896 GET32(&pd->pd_meta, pdd->PD_Reference)); 2897 if (i < 0) 2898 continue; 2899 if (disk->d_state == G_RAID_DISK_S_FAILED) { 2900 SET16(gmeta, pdr->entry[i].PD_State, 2901 GET16(gmeta, pdr->entry[i].PD_State) | 2902 (DDF_PDE_FAILED | DDF_PDE_PFA)); 2903 } 2904 if (disk->d_state != G_RAID_DISK_S_SPARE) 2905 continue; 2906 sa = ddf_meta_find_sa(&pd->pd_meta, 0); 2907 if (sa == NULL || 2908 (GET8D(&pd->pd_meta, sa->Spare_Type) & 2909 DDF_SAR_TYPE_DEDICATED) == 0) { 2910 SET16(gmeta, pdr->entry[i].PD_Type, 2911 GET16(gmeta, pdr->entry[i].PD_Type) | 2912 DDF_PDE_GLOBAL_SPARE); 2913 } else { 2914 SET16(gmeta, pdr->entry[i].PD_Type, 2915 GET16(gmeta, pdr->entry[i].PD_Type) | 2916 DDF_PDE_CONFIG_SPARE); 2917 } 2918 SET16(gmeta, pdr->entry[i].PD_State, 2919 GET16(gmeta, pdr->entry[i].PD_State) | 2920 DDF_PDE_ONLINE); 2921 } 2922 2923 /* Remove disks without "participating" flag (unused). */ 2924 for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) { 2925 if (isff(gmeta->pdr->entry[i].PD_GUID, 24)) 2926 continue; 2927 if ((GET16(gmeta, pdr->entry[i].PD_Type) & 2928 (DDF_PDE_PARTICIPATING | 2929 DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)) != 0 || 2930 g_raid_md_ddf_get_disk(sc, 2931 NULL, GET32(gmeta, pdr->entry[i].PD_Reference)) != NULL) 2932 j = i; 2933 else 2934 memset(&gmeta->pdr->entry[i], 0xff, 2935 sizeof(struct ddf_pd_entry)); 2936 } 2937 SET16(gmeta, pdr->Populated_PDEs, j + 1); 2938 2939 /* Update per-disk metadata and write them. */ 2940 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2941 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2942 if (disk->d_state != G_RAID_DISK_S_ACTIVE && 2943 disk->d_state != G_RAID_DISK_S_SPARE) 2944 continue; 2945 /* Update PDR. */ 2946 memcpy(pd->pd_meta.pdr, gmeta->pdr, 2947 GET32(&pd->pd_meta, hdr->pdr_length) * 2948 pd->pd_meta.sectorsize); 2949 /* Update VDR. */ 2950 SET16(&pd->pd_meta, vdr->Populated_VDEs, 0); 2951 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2952 if (vol->v_stopping) 2953 continue; 2954 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2955 i = ddf_meta_find_vd(&pd->pd_meta, 2956 pv->pv_meta.vde->VD_GUID); 2957 if (i < 0) 2958 i = ddf_meta_find_vd(&pd->pd_meta, NULL); 2959 if (i >= 0) 2960 memcpy(&pd->pd_meta.vdr->entry[i], 2961 pv->pv_meta.vde, 2962 sizeof(struct ddf_vd_entry)); 2963 } 2964 /* Update VDC. */ 2965 if (mdi->mdio_starting == 0) { 2966 /* Remove all VDCs to restore needed later. */ 2967 j = GETCRNUM(&pd->pd_meta); 2968 for (i = 0; i < j; i++) { 2969 vdc = GETVDCPTR(&pd->pd_meta, i); 2970 if (GET32D(&pd->pd_meta, vdc->Signature) != 2971 DDF_VDCR_SIGNATURE) 2972 continue; 2973 SET32D(&pd->pd_meta, vdc->Signature, 0xffffffff); 2974 } 2975 } 2976 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 2977 vol = sd->sd_volume; 2978 if (vol->v_stopping) 2979 continue; 2980 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2981 vmeta = &pv->pv_meta; 2982 vdc = ddf_meta_find_vdc(&pd->pd_meta, 2983 vmeta->vde->VD_GUID); 2984 if (vdc == NULL) 2985 vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL); 2986 if (vdc != NULL) { 2987 bvd = sd->sd_pos / GET16(vmeta, 2988 vdc->Primary_Element_Count); 2989 memcpy(vdc, vmeta->bvdc[bvd], 2990 GET16(&pd->pd_meta, 2991 hdr->Configuration_Record_Length) * 2992 pd->pd_meta.sectorsize); 2993 } 2994 } 2995 G_RAID_DEBUG(1, "Writing DDF metadata to %s", 2996 g_raid_get_diskname(disk)); 2997 g_raid_md_ddf_print(&pd->pd_meta); 2998 ddf_meta_write(disk->d_consumer, &pd->pd_meta); 2999 } 3000 return (0); 3001 } 3002 3003 static int 3004 g_raid_md_fail_disk_ddf(struct g_raid_md_object *md, 3005 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 3006 { 3007 struct g_raid_softc *sc; 3008 struct g_raid_md_ddf_perdisk *pd; 3009 struct g_raid_subdisk *sd; 3010 int i; 3011 3012 sc = md->mdo_softc; 3013 pd = (struct g_raid_md_ddf_perdisk *)tdisk->d_md_data; 3014 3015 /* We can't fail disk that is not a part of array now. */ 3016 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 3017 return (-1); 3018 3019 /* 3020 * Mark disk as failed in metadata and try to write that metadata 3021 * to the disk itself to prevent it's later resurrection as STALE. 3022 */ 3023 G_RAID_DEBUG(1, "Writing DDF metadata to %s", 3024 g_raid_get_diskname(tdisk)); 3025 i = ddf_meta_find_pd(&pd->pd_meta, NULL, GET32(&pd->pd_meta, pdd->PD_Reference)); 3026 SET16(&pd->pd_meta, pdr->entry[i].PD_State, DDF_PDE_FAILED | DDF_PDE_PFA); 3027 if (tdisk->d_consumer != NULL) 3028 ddf_meta_write(tdisk->d_consumer, &pd->pd_meta); 3029 3030 /* Change states. */ 3031 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 3032 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 3033 g_raid_change_subdisk_state(sd, 3034 G_RAID_SUBDISK_S_FAILED); 3035 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 3036 G_RAID_EVENT_SUBDISK); 3037 } 3038 3039 /* Write updated metadata to remaining disks. */ 3040 g_raid_md_write_ddf(md, NULL, NULL, tdisk); 3041 3042 g_raid_md_ddf_refill(sc); 3043 return (0); 3044 } 3045 3046 static int 3047 g_raid_md_free_disk_ddf(struct g_raid_md_object *md, 3048 struct g_raid_disk *disk) 3049 { 3050 struct g_raid_md_ddf_perdisk *pd; 3051 3052 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 3053 ddf_meta_free(&pd->pd_meta); 3054 free(pd, M_MD_DDF); 3055 disk->d_md_data = NULL; 3056 return (0); 3057 } 3058 3059 static int 3060 g_raid_md_free_volume_ddf(struct g_raid_md_object *md, 3061 struct g_raid_volume *vol) 3062 { 3063 struct g_raid_md_ddf_object *mdi; 3064 struct g_raid_md_ddf_pervolume *pv; 3065 3066 mdi = (struct g_raid_md_ddf_object *)md; 3067 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 3068 ddf_vol_meta_free(&pv->pv_meta); 3069 if (!pv->pv_started) { 3070 pv->pv_started = 1; 3071 mdi->mdio_starting--; 3072 callout_stop(&pv->pv_start_co); 3073 } 3074 free(pv, M_MD_DDF); 3075 vol->v_md_data = NULL; 3076 return (0); 3077 } 3078 3079 static int 3080 g_raid_md_free_ddf(struct g_raid_md_object *md) 3081 { 3082 struct g_raid_md_ddf_object *mdi; 3083 3084 mdi = (struct g_raid_md_ddf_object *)md; 3085 if (!mdi->mdio_started) { 3086 mdi->mdio_started = 0; 3087 callout_stop(&mdi->mdio_start_co); 3088 G_RAID_DEBUG1(1, md->mdo_softc, 3089 "root_mount_rel %p", mdi->mdio_rootmount); 3090 root_mount_rel(mdi->mdio_rootmount); 3091 mdi->mdio_rootmount = NULL; 3092 } 3093 ddf_meta_free(&mdi->mdio_meta); 3094 return (0); 3095 } 3096 3097 G_RAID_MD_DECLARE(ddf, "DDF"); 3098