1 /*- 2 * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/module.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/bio.h> 38 #include <sys/sbuf.h> 39 #include <sys/sysctl.h> 40 #include <sys/malloc.h> 41 #include <sys/eventhandler.h> 42 #include <vm/uma.h> 43 #include <geom/geom.h> 44 #include <sys/proc.h> 45 #include <sys/kthread.h> 46 #include <sys/sched.h> 47 #include <geom/raid/g_raid.h> 48 #include "g_raid_md_if.h" 49 #include "g_raid_tr_if.h" 50 51 static MALLOC_DEFINE(M_RAID, "raid_data", "GEOM_RAID Data"); 52 53 SYSCTL_DECL(_kern_geom); 54 SYSCTL_NODE(_kern_geom, OID_AUTO, raid, CTLFLAG_RW, 0, "GEOM_RAID stuff"); 55 int g_raid_enable = 1; 56 TUNABLE_INT("kern.geom.raid.enable", &g_raid_enable); 57 SYSCTL_INT(_kern_geom_raid, OID_AUTO, enable, CTLFLAG_RW, 58 &g_raid_enable, 0, "Enable on-disk metadata taste"); 59 u_int g_raid_aggressive_spare = 0; 60 TUNABLE_INT("kern.geom.raid.aggressive_spare", &g_raid_aggressive_spare); 61 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, aggressive_spare, CTLFLAG_RW, 62 &g_raid_aggressive_spare, 0, "Use disks without metadata as spare"); 63 u_int g_raid_debug = 0; 64 TUNABLE_INT("kern.geom.raid.debug", &g_raid_debug); 65 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RW, &g_raid_debug, 0, 66 "Debug level"); 67 int g_raid_read_err_thresh = 10; 68 TUNABLE_INT("kern.geom.raid.read_err_thresh", &g_raid_read_err_thresh); 69 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, read_err_thresh, CTLFLAG_RW, 70 &g_raid_read_err_thresh, 0, 71 "Number of read errors equated to disk failure"); 72 u_int g_raid_start_timeout = 30; 73 TUNABLE_INT("kern.geom.raid.start_timeout", &g_raid_start_timeout); 74 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, start_timeout, CTLFLAG_RW, 75 &g_raid_start_timeout, 0, 76 "Time to wait for all array components"); 77 static u_int g_raid_clean_time = 5; 78 TUNABLE_INT("kern.geom.raid.clean_time", &g_raid_clean_time); 79 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, clean_time, CTLFLAG_RW, 80 &g_raid_clean_time, 0, "Mark volume as clean when idling"); 81 static u_int g_raid_disconnect_on_failure = 1; 82 TUNABLE_INT("kern.geom.raid.disconnect_on_failure", 83 &g_raid_disconnect_on_failure); 84 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 85 &g_raid_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 86 static u_int g_raid_name_format = 0; 87 TUNABLE_INT("kern.geom.raid.name_format", &g_raid_name_format); 88 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, name_format, CTLFLAG_RW, 89 &g_raid_name_format, 0, "Providers name format."); 90 static u_int g_raid_idle_threshold = 1000000; 91 TUNABLE_INT("kern.geom.raid.idle_threshold", &g_raid_idle_threshold); 92 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, idle_threshold, CTLFLAG_RW, 93 &g_raid_idle_threshold, 1000000, 94 "Time in microseconds to consider a volume idle."); 95 96 #define MSLEEP(rv, ident, mtx, priority, wmesg, timeout) do { \ 97 G_RAID_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 98 rv = msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 99 G_RAID_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 100 } while (0) 101 102 LIST_HEAD(, g_raid_md_class) g_raid_md_classes = 103 LIST_HEAD_INITIALIZER(g_raid_md_classes); 104 105 LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes = 106 LIST_HEAD_INITIALIZER(g_raid_tr_classes); 107 108 LIST_HEAD(, g_raid_volume) g_raid_volumes = 109 LIST_HEAD_INITIALIZER(g_raid_volumes); 110 111 static eventhandler_tag g_raid_pre_sync = NULL; 112 static int g_raid_started = 0; 113 114 static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp, 115 struct g_geom *gp); 116 static g_taste_t g_raid_taste; 117 static void g_raid_init(struct g_class *mp); 118 static void g_raid_fini(struct g_class *mp); 119 120 struct g_class g_raid_class = { 121 .name = G_RAID_CLASS_NAME, 122 .version = G_VERSION, 123 .ctlreq = g_raid_ctl, 124 .taste = g_raid_taste, 125 .destroy_geom = g_raid_destroy_geom, 126 .init = g_raid_init, 127 .fini = g_raid_fini 128 }; 129 130 static void g_raid_destroy_provider(struct g_raid_volume *vol); 131 static int g_raid_update_disk(struct g_raid_disk *disk, u_int event); 132 static int g_raid_update_subdisk(struct g_raid_subdisk *subdisk, u_int event); 133 static int g_raid_update_volume(struct g_raid_volume *vol, u_int event); 134 static int g_raid_update_node(struct g_raid_softc *sc, u_int event); 135 static void g_raid_dumpconf(struct sbuf *sb, const char *indent, 136 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 137 static void g_raid_start(struct bio *bp); 138 static void g_raid_start_request(struct bio *bp); 139 static void g_raid_disk_done(struct bio *bp); 140 static void g_raid_poll(struct g_raid_softc *sc); 141 142 static const char * 143 g_raid_node_event2str(int event) 144 { 145 146 switch (event) { 147 case G_RAID_NODE_E_WAKE: 148 return ("WAKE"); 149 case G_RAID_NODE_E_START: 150 return ("START"); 151 default: 152 return ("INVALID"); 153 } 154 } 155 156 const char * 157 g_raid_disk_state2str(int state) 158 { 159 160 switch (state) { 161 case G_RAID_DISK_S_NONE: 162 return ("NONE"); 163 case G_RAID_DISK_S_OFFLINE: 164 return ("OFFLINE"); 165 case G_RAID_DISK_S_FAILED: 166 return ("FAILED"); 167 case G_RAID_DISK_S_STALE_FAILED: 168 return ("STALE_FAILED"); 169 case G_RAID_DISK_S_SPARE: 170 return ("SPARE"); 171 case G_RAID_DISK_S_STALE: 172 return ("STALE"); 173 case G_RAID_DISK_S_ACTIVE: 174 return ("ACTIVE"); 175 default: 176 return ("INVALID"); 177 } 178 } 179 180 static const char * 181 g_raid_disk_event2str(int event) 182 { 183 184 switch (event) { 185 case G_RAID_DISK_E_DISCONNECTED: 186 return ("DISCONNECTED"); 187 default: 188 return ("INVALID"); 189 } 190 } 191 192 const char * 193 g_raid_subdisk_state2str(int state) 194 { 195 196 switch (state) { 197 case G_RAID_SUBDISK_S_NONE: 198 return ("NONE"); 199 case G_RAID_SUBDISK_S_FAILED: 200 return ("FAILED"); 201 case G_RAID_SUBDISK_S_NEW: 202 return ("NEW"); 203 case G_RAID_SUBDISK_S_REBUILD: 204 return ("REBUILD"); 205 case G_RAID_SUBDISK_S_UNINITIALIZED: 206 return ("UNINITIALIZED"); 207 case G_RAID_SUBDISK_S_STALE: 208 return ("STALE"); 209 case G_RAID_SUBDISK_S_RESYNC: 210 return ("RESYNC"); 211 case G_RAID_SUBDISK_S_ACTIVE: 212 return ("ACTIVE"); 213 default: 214 return ("INVALID"); 215 } 216 } 217 218 static const char * 219 g_raid_subdisk_event2str(int event) 220 { 221 222 switch (event) { 223 case G_RAID_SUBDISK_E_NEW: 224 return ("NEW"); 225 case G_RAID_SUBDISK_E_FAILED: 226 return ("FAILED"); 227 case G_RAID_SUBDISK_E_DISCONNECTED: 228 return ("DISCONNECTED"); 229 default: 230 return ("INVALID"); 231 } 232 } 233 234 const char * 235 g_raid_volume_state2str(int state) 236 { 237 238 switch (state) { 239 case G_RAID_VOLUME_S_STARTING: 240 return ("STARTING"); 241 case G_RAID_VOLUME_S_BROKEN: 242 return ("BROKEN"); 243 case G_RAID_VOLUME_S_DEGRADED: 244 return ("DEGRADED"); 245 case G_RAID_VOLUME_S_SUBOPTIMAL: 246 return ("SUBOPTIMAL"); 247 case G_RAID_VOLUME_S_OPTIMAL: 248 return ("OPTIMAL"); 249 case G_RAID_VOLUME_S_UNSUPPORTED: 250 return ("UNSUPPORTED"); 251 case G_RAID_VOLUME_S_STOPPED: 252 return ("STOPPED"); 253 default: 254 return ("INVALID"); 255 } 256 } 257 258 static const char * 259 g_raid_volume_event2str(int event) 260 { 261 262 switch (event) { 263 case G_RAID_VOLUME_E_UP: 264 return ("UP"); 265 case G_RAID_VOLUME_E_DOWN: 266 return ("DOWN"); 267 case G_RAID_VOLUME_E_START: 268 return ("START"); 269 case G_RAID_VOLUME_E_STARTMD: 270 return ("STARTMD"); 271 default: 272 return ("INVALID"); 273 } 274 } 275 276 const char * 277 g_raid_volume_level2str(int level, int qual) 278 { 279 280 switch (level) { 281 case G_RAID_VOLUME_RL_RAID0: 282 return ("RAID0"); 283 case G_RAID_VOLUME_RL_RAID1: 284 return ("RAID1"); 285 case G_RAID_VOLUME_RL_RAID3: 286 if (qual == G_RAID_VOLUME_RLQ_R3P0) 287 return ("RAID3-P0"); 288 if (qual == G_RAID_VOLUME_RLQ_R3PN) 289 return ("RAID3-PN"); 290 return ("RAID3"); 291 case G_RAID_VOLUME_RL_RAID4: 292 if (qual == G_RAID_VOLUME_RLQ_R4P0) 293 return ("RAID4-P0"); 294 if (qual == G_RAID_VOLUME_RLQ_R4PN) 295 return ("RAID4-PN"); 296 return ("RAID4"); 297 case G_RAID_VOLUME_RL_RAID5: 298 if (qual == G_RAID_VOLUME_RLQ_R5RA) 299 return ("RAID5-RA"); 300 if (qual == G_RAID_VOLUME_RLQ_R5RS) 301 return ("RAID5-RS"); 302 if (qual == G_RAID_VOLUME_RLQ_R5LA) 303 return ("RAID5-LA"); 304 if (qual == G_RAID_VOLUME_RLQ_R5LS) 305 return ("RAID5-LS"); 306 return ("RAID5"); 307 case G_RAID_VOLUME_RL_RAID6: 308 if (qual == G_RAID_VOLUME_RLQ_R6RA) 309 return ("RAID6-RA"); 310 if (qual == G_RAID_VOLUME_RLQ_R6RS) 311 return ("RAID6-RS"); 312 if (qual == G_RAID_VOLUME_RLQ_R6LA) 313 return ("RAID6-LA"); 314 if (qual == G_RAID_VOLUME_RLQ_R6LS) 315 return ("RAID6-LS"); 316 return ("RAID6"); 317 case G_RAID_VOLUME_RL_RAIDMDF: 318 if (qual == G_RAID_VOLUME_RLQ_RMDFRA) 319 return ("RAIDMDF-RA"); 320 if (qual == G_RAID_VOLUME_RLQ_RMDFRS) 321 return ("RAIDMDF-RS"); 322 if (qual == G_RAID_VOLUME_RLQ_RMDFLA) 323 return ("RAIDMDF-LA"); 324 if (qual == G_RAID_VOLUME_RLQ_RMDFLS) 325 return ("RAIDMDF-LS"); 326 return ("RAIDMDF"); 327 case G_RAID_VOLUME_RL_RAID1E: 328 if (qual == G_RAID_VOLUME_RLQ_R1EA) 329 return ("RAID1E-A"); 330 if (qual == G_RAID_VOLUME_RLQ_R1EO) 331 return ("RAID1E-O"); 332 return ("RAID1E"); 333 case G_RAID_VOLUME_RL_SINGLE: 334 return ("SINGLE"); 335 case G_RAID_VOLUME_RL_CONCAT: 336 return ("CONCAT"); 337 case G_RAID_VOLUME_RL_RAID5E: 338 if (qual == G_RAID_VOLUME_RLQ_R5ERA) 339 return ("RAID5E-RA"); 340 if (qual == G_RAID_VOLUME_RLQ_R5ERS) 341 return ("RAID5E-RS"); 342 if (qual == G_RAID_VOLUME_RLQ_R5ELA) 343 return ("RAID5E-LA"); 344 if (qual == G_RAID_VOLUME_RLQ_R5ELS) 345 return ("RAID5E-LS"); 346 return ("RAID5E"); 347 case G_RAID_VOLUME_RL_RAID5EE: 348 if (qual == G_RAID_VOLUME_RLQ_R5EERA) 349 return ("RAID5EE-RA"); 350 if (qual == G_RAID_VOLUME_RLQ_R5EERS) 351 return ("RAID5EE-RS"); 352 if (qual == G_RAID_VOLUME_RLQ_R5EELA) 353 return ("RAID5EE-LA"); 354 if (qual == G_RAID_VOLUME_RLQ_R5EELS) 355 return ("RAID5EE-LS"); 356 return ("RAID5EE"); 357 case G_RAID_VOLUME_RL_RAID5R: 358 if (qual == G_RAID_VOLUME_RLQ_R5RRA) 359 return ("RAID5R-RA"); 360 if (qual == G_RAID_VOLUME_RLQ_R5RRS) 361 return ("RAID5R-RS"); 362 if (qual == G_RAID_VOLUME_RLQ_R5RLA) 363 return ("RAID5R-LA"); 364 if (qual == G_RAID_VOLUME_RLQ_R5RLS) 365 return ("RAID5R-LS"); 366 return ("RAID5E"); 367 default: 368 return ("UNKNOWN"); 369 } 370 } 371 372 int 373 g_raid_volume_str2level(const char *str, int *level, int *qual) 374 { 375 376 *level = G_RAID_VOLUME_RL_UNKNOWN; 377 *qual = G_RAID_VOLUME_RLQ_NONE; 378 if (strcasecmp(str, "RAID0") == 0) 379 *level = G_RAID_VOLUME_RL_RAID0; 380 else if (strcasecmp(str, "RAID1") == 0) 381 *level = G_RAID_VOLUME_RL_RAID1; 382 else if (strcasecmp(str, "RAID3-P0") == 0) { 383 *level = G_RAID_VOLUME_RL_RAID3; 384 *qual = G_RAID_VOLUME_RLQ_R3P0; 385 } else if (strcasecmp(str, "RAID3-PN") == 0 || 386 strcasecmp(str, "RAID3") == 0) { 387 *level = G_RAID_VOLUME_RL_RAID3; 388 *qual = G_RAID_VOLUME_RLQ_R3PN; 389 } else if (strcasecmp(str, "RAID4-P0") == 0) { 390 *level = G_RAID_VOLUME_RL_RAID4; 391 *qual = G_RAID_VOLUME_RLQ_R4P0; 392 } else if (strcasecmp(str, "RAID4-PN") == 0 || 393 strcasecmp(str, "RAID4") == 0) { 394 *level = G_RAID_VOLUME_RL_RAID4; 395 *qual = G_RAID_VOLUME_RLQ_R4PN; 396 } else if (strcasecmp(str, "RAID5-RA") == 0) { 397 *level = G_RAID_VOLUME_RL_RAID5; 398 *qual = G_RAID_VOLUME_RLQ_R5RA; 399 } else if (strcasecmp(str, "RAID5-RS") == 0) { 400 *level = G_RAID_VOLUME_RL_RAID5; 401 *qual = G_RAID_VOLUME_RLQ_R5RS; 402 } else if (strcasecmp(str, "RAID5") == 0 || 403 strcasecmp(str, "RAID5-LA") == 0) { 404 *level = G_RAID_VOLUME_RL_RAID5; 405 *qual = G_RAID_VOLUME_RLQ_R5LA; 406 } else if (strcasecmp(str, "RAID5-LS") == 0) { 407 *level = G_RAID_VOLUME_RL_RAID5; 408 *qual = G_RAID_VOLUME_RLQ_R5LS; 409 } else if (strcasecmp(str, "RAID6-RA") == 0) { 410 *level = G_RAID_VOLUME_RL_RAID6; 411 *qual = G_RAID_VOLUME_RLQ_R6RA; 412 } else if (strcasecmp(str, "RAID6-RS") == 0) { 413 *level = G_RAID_VOLUME_RL_RAID6; 414 *qual = G_RAID_VOLUME_RLQ_R6RS; 415 } else if (strcasecmp(str, "RAID6") == 0 || 416 strcasecmp(str, "RAID6-LA") == 0) { 417 *level = G_RAID_VOLUME_RL_RAID6; 418 *qual = G_RAID_VOLUME_RLQ_R6LA; 419 } else if (strcasecmp(str, "RAID6-LS") == 0) { 420 *level = G_RAID_VOLUME_RL_RAID6; 421 *qual = G_RAID_VOLUME_RLQ_R6LS; 422 } else if (strcasecmp(str, "RAIDMDF-RA") == 0) { 423 *level = G_RAID_VOLUME_RL_RAIDMDF; 424 *qual = G_RAID_VOLUME_RLQ_RMDFRA; 425 } else if (strcasecmp(str, "RAIDMDF-RS") == 0) { 426 *level = G_RAID_VOLUME_RL_RAIDMDF; 427 *qual = G_RAID_VOLUME_RLQ_RMDFRS; 428 } else if (strcasecmp(str, "RAIDMDF") == 0 || 429 strcasecmp(str, "RAIDMDF-LA") == 0) { 430 *level = G_RAID_VOLUME_RL_RAIDMDF; 431 *qual = G_RAID_VOLUME_RLQ_RMDFLA; 432 } else if (strcasecmp(str, "RAIDMDF-LS") == 0) { 433 *level = G_RAID_VOLUME_RL_RAIDMDF; 434 *qual = G_RAID_VOLUME_RLQ_RMDFLS; 435 } else if (strcasecmp(str, "RAID10") == 0 || 436 strcasecmp(str, "RAID1E") == 0 || 437 strcasecmp(str, "RAID1E-A") == 0) { 438 *level = G_RAID_VOLUME_RL_RAID1E; 439 *qual = G_RAID_VOLUME_RLQ_R1EA; 440 } else if (strcasecmp(str, "RAID1E-O") == 0) { 441 *level = G_RAID_VOLUME_RL_RAID1E; 442 *qual = G_RAID_VOLUME_RLQ_R1EO; 443 } else if (strcasecmp(str, "SINGLE") == 0) 444 *level = G_RAID_VOLUME_RL_SINGLE; 445 else if (strcasecmp(str, "CONCAT") == 0) 446 *level = G_RAID_VOLUME_RL_CONCAT; 447 else if (strcasecmp(str, "RAID5E-RA") == 0) { 448 *level = G_RAID_VOLUME_RL_RAID5E; 449 *qual = G_RAID_VOLUME_RLQ_R5ERA; 450 } else if (strcasecmp(str, "RAID5E-RS") == 0) { 451 *level = G_RAID_VOLUME_RL_RAID5E; 452 *qual = G_RAID_VOLUME_RLQ_R5ERS; 453 } else if (strcasecmp(str, "RAID5E") == 0 || 454 strcasecmp(str, "RAID5E-LA") == 0) { 455 *level = G_RAID_VOLUME_RL_RAID5E; 456 *qual = G_RAID_VOLUME_RLQ_R5ELA; 457 } else if (strcasecmp(str, "RAID5E-LS") == 0) { 458 *level = G_RAID_VOLUME_RL_RAID5E; 459 *qual = G_RAID_VOLUME_RLQ_R5ELS; 460 } else if (strcasecmp(str, "RAID5EE-RA") == 0) { 461 *level = G_RAID_VOLUME_RL_RAID5EE; 462 *qual = G_RAID_VOLUME_RLQ_R5EERA; 463 } else if (strcasecmp(str, "RAID5EE-RS") == 0) { 464 *level = G_RAID_VOLUME_RL_RAID5EE; 465 *qual = G_RAID_VOLUME_RLQ_R5EERS; 466 } else if (strcasecmp(str, "RAID5EE") == 0 || 467 strcasecmp(str, "RAID5EE-LA") == 0) { 468 *level = G_RAID_VOLUME_RL_RAID5EE; 469 *qual = G_RAID_VOLUME_RLQ_R5EELA; 470 } else if (strcasecmp(str, "RAID5EE-LS") == 0) { 471 *level = G_RAID_VOLUME_RL_RAID5EE; 472 *qual = G_RAID_VOLUME_RLQ_R5EELS; 473 } else if (strcasecmp(str, "RAID5R-RA") == 0) { 474 *level = G_RAID_VOLUME_RL_RAID5R; 475 *qual = G_RAID_VOLUME_RLQ_R5RRA; 476 } else if (strcasecmp(str, "RAID5R-RS") == 0) { 477 *level = G_RAID_VOLUME_RL_RAID5R; 478 *qual = G_RAID_VOLUME_RLQ_R5RRS; 479 } else if (strcasecmp(str, "RAID5R") == 0 || 480 strcasecmp(str, "RAID5R-LA") == 0) { 481 *level = G_RAID_VOLUME_RL_RAID5R; 482 *qual = G_RAID_VOLUME_RLQ_R5RLA; 483 } else if (strcasecmp(str, "RAID5R-LS") == 0) { 484 *level = G_RAID_VOLUME_RL_RAID5R; 485 *qual = G_RAID_VOLUME_RLQ_R5RLS; 486 } else 487 return (-1); 488 return (0); 489 } 490 491 const char * 492 g_raid_get_diskname(struct g_raid_disk *disk) 493 { 494 495 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 496 return ("[unknown]"); 497 return (disk->d_consumer->provider->name); 498 } 499 500 void 501 g_raid_report_disk_state(struct g_raid_disk *disk) 502 { 503 struct g_raid_subdisk *sd; 504 int len, state; 505 uint32_t s; 506 507 if (disk->d_consumer == NULL) 508 return; 509 if (disk->d_state == G_RAID_DISK_S_FAILED || 510 disk->d_state == G_RAID_DISK_S_STALE_FAILED) { 511 s = G_STATE_FAILED; 512 } else { 513 state = G_RAID_SUBDISK_S_ACTIVE; 514 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 515 if (sd->sd_state < state) 516 state = sd->sd_state; 517 } 518 if (state == G_RAID_SUBDISK_S_FAILED) 519 s = G_STATE_FAILED; 520 else if (state == G_RAID_SUBDISK_S_NEW || 521 state == G_RAID_SUBDISK_S_REBUILD) 522 s = G_STATE_REBUILD; 523 else if (state == G_RAID_SUBDISK_S_STALE || 524 state == G_RAID_SUBDISK_S_RESYNC) 525 s = G_STATE_RESYNC; 526 else 527 s = G_STATE_ACTIVE; 528 } 529 len = sizeof(s); 530 g_io_getattr("GEOM::setstate", disk->d_consumer, &len, &s); 531 G_RAID_DEBUG1(2, disk->d_softc, "Disk %s state reported as %d.", 532 g_raid_get_diskname(disk), s); 533 } 534 535 void 536 g_raid_change_disk_state(struct g_raid_disk *disk, int state) 537 { 538 539 G_RAID_DEBUG1(0, disk->d_softc, "Disk %s state changed from %s to %s.", 540 g_raid_get_diskname(disk), 541 g_raid_disk_state2str(disk->d_state), 542 g_raid_disk_state2str(state)); 543 disk->d_state = state; 544 g_raid_report_disk_state(disk); 545 } 546 547 void 548 g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state) 549 { 550 551 G_RAID_DEBUG1(0, sd->sd_softc, 552 "Subdisk %s:%d-%s state changed from %s to %s.", 553 sd->sd_volume->v_name, sd->sd_pos, 554 sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]", 555 g_raid_subdisk_state2str(sd->sd_state), 556 g_raid_subdisk_state2str(state)); 557 sd->sd_state = state; 558 if (sd->sd_disk) 559 g_raid_report_disk_state(sd->sd_disk); 560 } 561 562 void 563 g_raid_change_volume_state(struct g_raid_volume *vol, int state) 564 { 565 566 G_RAID_DEBUG1(0, vol->v_softc, 567 "Volume %s state changed from %s to %s.", 568 vol->v_name, 569 g_raid_volume_state2str(vol->v_state), 570 g_raid_volume_state2str(state)); 571 vol->v_state = state; 572 } 573 574 /* 575 * --- Events handling functions --- 576 * Events in geom_raid are used to maintain subdisks and volumes status 577 * from one thread to simplify locking. 578 */ 579 static void 580 g_raid_event_free(struct g_raid_event *ep) 581 { 582 583 free(ep, M_RAID); 584 } 585 586 int 587 g_raid_event_send(void *arg, int event, int flags) 588 { 589 struct g_raid_softc *sc; 590 struct g_raid_event *ep; 591 int error; 592 593 if ((flags & G_RAID_EVENT_VOLUME) != 0) { 594 sc = ((struct g_raid_volume *)arg)->v_softc; 595 } else if ((flags & G_RAID_EVENT_DISK) != 0) { 596 sc = ((struct g_raid_disk *)arg)->d_softc; 597 } else if ((flags & G_RAID_EVENT_SUBDISK) != 0) { 598 sc = ((struct g_raid_subdisk *)arg)->sd_softc; 599 } else { 600 sc = arg; 601 } 602 ep = malloc(sizeof(*ep), M_RAID, 603 sx_xlocked(&sc->sc_lock) ? M_WAITOK : M_NOWAIT); 604 if (ep == NULL) 605 return (ENOMEM); 606 ep->e_tgt = arg; 607 ep->e_event = event; 608 ep->e_flags = flags; 609 ep->e_error = 0; 610 G_RAID_DEBUG1(4, sc, "Sending event %p. Waking up %p.", ep, sc); 611 mtx_lock(&sc->sc_queue_mtx); 612 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 613 mtx_unlock(&sc->sc_queue_mtx); 614 wakeup(sc); 615 616 if ((flags & G_RAID_EVENT_WAIT) == 0) 617 return (0); 618 619 sx_assert(&sc->sc_lock, SX_XLOCKED); 620 G_RAID_DEBUG1(4, sc, "Sleeping on %p.", ep); 621 sx_xunlock(&sc->sc_lock); 622 while ((ep->e_flags & G_RAID_EVENT_DONE) == 0) { 623 mtx_lock(&sc->sc_queue_mtx); 624 MSLEEP(error, ep, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:event", 625 hz * 5); 626 } 627 error = ep->e_error; 628 g_raid_event_free(ep); 629 sx_xlock(&sc->sc_lock); 630 return (error); 631 } 632 633 static void 634 g_raid_event_cancel(struct g_raid_softc *sc, void *tgt) 635 { 636 struct g_raid_event *ep, *tmpep; 637 638 sx_assert(&sc->sc_lock, SX_XLOCKED); 639 640 mtx_lock(&sc->sc_queue_mtx); 641 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 642 if (ep->e_tgt != tgt) 643 continue; 644 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 645 if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) 646 g_raid_event_free(ep); 647 else { 648 ep->e_error = ECANCELED; 649 wakeup(ep); 650 } 651 } 652 mtx_unlock(&sc->sc_queue_mtx); 653 } 654 655 static int 656 g_raid_event_check(struct g_raid_softc *sc, void *tgt) 657 { 658 struct g_raid_event *ep; 659 int res = 0; 660 661 sx_assert(&sc->sc_lock, SX_XLOCKED); 662 663 mtx_lock(&sc->sc_queue_mtx); 664 TAILQ_FOREACH(ep, &sc->sc_events, e_next) { 665 if (ep->e_tgt != tgt) 666 continue; 667 res = 1; 668 break; 669 } 670 mtx_unlock(&sc->sc_queue_mtx); 671 return (res); 672 } 673 674 /* 675 * Return the number of disks in given state. 676 * If state is equal to -1, count all connected disks. 677 */ 678 u_int 679 g_raid_ndisks(struct g_raid_softc *sc, int state) 680 { 681 struct g_raid_disk *disk; 682 u_int n; 683 684 sx_assert(&sc->sc_lock, SX_LOCKED); 685 686 n = 0; 687 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 688 if (disk->d_state == state || state == -1) 689 n++; 690 } 691 return (n); 692 } 693 694 /* 695 * Return the number of subdisks in given state. 696 * If state is equal to -1, count all connected disks. 697 */ 698 u_int 699 g_raid_nsubdisks(struct g_raid_volume *vol, int state) 700 { 701 struct g_raid_subdisk *subdisk; 702 struct g_raid_softc *sc; 703 u_int i, n ; 704 705 sc = vol->v_softc; 706 sx_assert(&sc->sc_lock, SX_LOCKED); 707 708 n = 0; 709 for (i = 0; i < vol->v_disks_count; i++) { 710 subdisk = &vol->v_subdisks[i]; 711 if ((state == -1 && 712 subdisk->sd_state != G_RAID_SUBDISK_S_NONE) || 713 subdisk->sd_state == state) 714 n++; 715 } 716 return (n); 717 } 718 719 /* 720 * Return the first subdisk in given state. 721 * If state is equal to -1, then the first connected disks. 722 */ 723 struct g_raid_subdisk * 724 g_raid_get_subdisk(struct g_raid_volume *vol, int state) 725 { 726 struct g_raid_subdisk *sd; 727 struct g_raid_softc *sc; 728 u_int i; 729 730 sc = vol->v_softc; 731 sx_assert(&sc->sc_lock, SX_LOCKED); 732 733 for (i = 0; i < vol->v_disks_count; i++) { 734 sd = &vol->v_subdisks[i]; 735 if ((state == -1 && 736 sd->sd_state != G_RAID_SUBDISK_S_NONE) || 737 sd->sd_state == state) 738 return (sd); 739 } 740 return (NULL); 741 } 742 743 struct g_consumer * 744 g_raid_open_consumer(struct g_raid_softc *sc, const char *name) 745 { 746 struct g_consumer *cp; 747 struct g_provider *pp; 748 749 g_topology_assert(); 750 751 if (strncmp(name, "/dev/", 5) == 0) 752 name += 5; 753 pp = g_provider_by_name(name); 754 if (pp == NULL) 755 return (NULL); 756 cp = g_new_consumer(sc->sc_geom); 757 if (g_attach(cp, pp) != 0) { 758 g_destroy_consumer(cp); 759 return (NULL); 760 } 761 if (g_access(cp, 1, 1, 1) != 0) { 762 g_detach(cp); 763 g_destroy_consumer(cp); 764 return (NULL); 765 } 766 return (cp); 767 } 768 769 static u_int 770 g_raid_nrequests(struct g_raid_softc *sc, struct g_consumer *cp) 771 { 772 struct bio *bp; 773 u_int nreqs = 0; 774 775 mtx_lock(&sc->sc_queue_mtx); 776 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 777 if (bp->bio_from == cp) 778 nreqs++; 779 } 780 mtx_unlock(&sc->sc_queue_mtx); 781 return (nreqs); 782 } 783 784 u_int 785 g_raid_nopens(struct g_raid_softc *sc) 786 { 787 struct g_raid_volume *vol; 788 u_int opens; 789 790 opens = 0; 791 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 792 if (vol->v_provider_open != 0) 793 opens++; 794 } 795 return (opens); 796 } 797 798 static int 799 g_raid_consumer_is_busy(struct g_raid_softc *sc, struct g_consumer *cp) 800 { 801 802 if (cp->index > 0) { 803 G_RAID_DEBUG1(2, sc, 804 "I/O requests for %s exist, can't destroy it now.", 805 cp->provider->name); 806 return (1); 807 } 808 if (g_raid_nrequests(sc, cp) > 0) { 809 G_RAID_DEBUG1(2, sc, 810 "I/O requests for %s in queue, can't destroy it now.", 811 cp->provider->name); 812 return (1); 813 } 814 return (0); 815 } 816 817 static void 818 g_raid_destroy_consumer(void *arg, int flags __unused) 819 { 820 struct g_consumer *cp; 821 822 g_topology_assert(); 823 824 cp = arg; 825 G_RAID_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 826 g_detach(cp); 827 g_destroy_consumer(cp); 828 } 829 830 void 831 g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp) 832 { 833 struct g_provider *pp; 834 int retaste_wait; 835 836 g_topology_assert_not(); 837 838 g_topology_lock(); 839 cp->private = NULL; 840 if (g_raid_consumer_is_busy(sc, cp)) 841 goto out; 842 pp = cp->provider; 843 retaste_wait = 0; 844 if (cp->acw == 1) { 845 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 846 retaste_wait = 1; 847 } 848 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 849 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 850 if (retaste_wait) { 851 /* 852 * After retaste event was send (inside g_access()), we can send 853 * event to detach and destroy consumer. 854 * A class, which has consumer to the given provider connected 855 * will not receive retaste event for the provider. 856 * This is the way how I ignore retaste events when I close 857 * consumers opened for write: I detach and destroy consumer 858 * after retaste event is sent. 859 */ 860 g_post_event(g_raid_destroy_consumer, cp, M_WAITOK, NULL); 861 goto out; 862 } 863 G_RAID_DEBUG(1, "Consumer %s destroyed.", pp->name); 864 g_detach(cp); 865 g_destroy_consumer(cp); 866 out: 867 g_topology_unlock(); 868 } 869 870 static void 871 g_raid_orphan(struct g_consumer *cp) 872 { 873 struct g_raid_disk *disk; 874 875 g_topology_assert(); 876 877 disk = cp->private; 878 if (disk == NULL) 879 return; 880 g_raid_event_send(disk, G_RAID_DISK_E_DISCONNECTED, 881 G_RAID_EVENT_DISK); 882 } 883 884 static int 885 g_raid_clean(struct g_raid_volume *vol, int acw) 886 { 887 struct g_raid_softc *sc; 888 int timeout; 889 890 sc = vol->v_softc; 891 g_topology_assert_not(); 892 sx_assert(&sc->sc_lock, SX_XLOCKED); 893 894 // if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0) 895 // return (0); 896 if (!vol->v_dirty) 897 return (0); 898 if (vol->v_writes > 0) 899 return (0); 900 if (acw > 0 || (acw == -1 && 901 vol->v_provider != NULL && vol->v_provider->acw > 0)) { 902 timeout = g_raid_clean_time - (time_uptime - vol->v_last_write); 903 if (timeout > 0) 904 return (timeout); 905 } 906 vol->v_dirty = 0; 907 G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.", 908 vol->v_name); 909 g_raid_write_metadata(sc, vol, NULL, NULL); 910 return (0); 911 } 912 913 static void 914 g_raid_dirty(struct g_raid_volume *vol) 915 { 916 struct g_raid_softc *sc; 917 918 sc = vol->v_softc; 919 g_topology_assert_not(); 920 sx_assert(&sc->sc_lock, SX_XLOCKED); 921 922 // if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0) 923 // return; 924 vol->v_dirty = 1; 925 G_RAID_DEBUG1(1, sc, "Volume %s marked as dirty.", 926 vol->v_name); 927 g_raid_write_metadata(sc, vol, NULL, NULL); 928 } 929 930 void 931 g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp) 932 { 933 struct g_raid_softc *sc; 934 struct g_raid_volume *vol; 935 struct g_raid_subdisk *sd; 936 struct bio_queue_head queue; 937 struct bio *cbp; 938 int i; 939 940 vol = tr->tro_volume; 941 sc = vol->v_softc; 942 943 /* 944 * Allocate all bios before sending any request, so we can return 945 * ENOMEM in nice and clean way. 946 */ 947 bioq_init(&queue); 948 for (i = 0; i < vol->v_disks_count; i++) { 949 sd = &vol->v_subdisks[i]; 950 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 951 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 952 continue; 953 cbp = g_clone_bio(bp); 954 if (cbp == NULL) 955 goto failure; 956 cbp->bio_caller1 = sd; 957 bioq_insert_tail(&queue, cbp); 958 } 959 for (cbp = bioq_first(&queue); cbp != NULL; 960 cbp = bioq_first(&queue)) { 961 bioq_remove(&queue, cbp); 962 sd = cbp->bio_caller1; 963 cbp->bio_caller1 = NULL; 964 g_raid_subdisk_iostart(sd, cbp); 965 } 966 return; 967 failure: 968 for (cbp = bioq_first(&queue); cbp != NULL; 969 cbp = bioq_first(&queue)) { 970 bioq_remove(&queue, cbp); 971 g_destroy_bio(cbp); 972 } 973 if (bp->bio_error == 0) 974 bp->bio_error = ENOMEM; 975 g_raid_iodone(bp, bp->bio_error); 976 } 977 978 static void 979 g_raid_tr_kerneldump_common_done(struct bio *bp) 980 { 981 982 bp->bio_flags |= BIO_DONE; 983 } 984 985 int 986 g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr, 987 void *virtual, vm_offset_t physical, off_t offset, size_t length) 988 { 989 struct g_raid_softc *sc; 990 struct g_raid_volume *vol; 991 struct bio bp; 992 993 vol = tr->tro_volume; 994 sc = vol->v_softc; 995 996 bzero(&bp, sizeof(bp)); 997 bp.bio_cmd = BIO_WRITE; 998 bp.bio_done = g_raid_tr_kerneldump_common_done; 999 bp.bio_attribute = NULL; 1000 bp.bio_offset = offset; 1001 bp.bio_length = length; 1002 bp.bio_data = virtual; 1003 bp.bio_to = vol->v_provider; 1004 1005 g_raid_start(&bp); 1006 while (!(bp.bio_flags & BIO_DONE)) { 1007 G_RAID_DEBUG1(4, sc, "Poll..."); 1008 g_raid_poll(sc); 1009 DELAY(10); 1010 } 1011 1012 return (bp.bio_error != 0 ? EIO : 0); 1013 } 1014 1015 static int 1016 g_raid_dump(void *arg, 1017 void *virtual, vm_offset_t physical, off_t offset, size_t length) 1018 { 1019 struct g_raid_volume *vol; 1020 int error; 1021 1022 vol = (struct g_raid_volume *)arg; 1023 G_RAID_DEBUG1(3, vol->v_softc, "Dumping at off %llu len %llu.", 1024 (long long unsigned)offset, (long long unsigned)length); 1025 1026 error = G_RAID_TR_KERNELDUMP(vol->v_tr, 1027 virtual, physical, offset, length); 1028 return (error); 1029 } 1030 1031 static void 1032 g_raid_kerneldump(struct g_raid_softc *sc, struct bio *bp) 1033 { 1034 struct g_kerneldump *gkd; 1035 struct g_provider *pp; 1036 struct g_raid_volume *vol; 1037 1038 gkd = (struct g_kerneldump*)bp->bio_data; 1039 pp = bp->bio_to; 1040 vol = pp->private; 1041 g_trace(G_T_TOPOLOGY, "g_raid_kerneldump(%s, %jd, %jd)", 1042 pp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); 1043 gkd->di.dumper = g_raid_dump; 1044 gkd->di.priv = vol; 1045 gkd->di.blocksize = vol->v_sectorsize; 1046 gkd->di.maxiosize = DFLTPHYS; 1047 gkd->di.mediaoffset = gkd->offset; 1048 if ((gkd->offset + gkd->length) > vol->v_mediasize) 1049 gkd->length = vol->v_mediasize - gkd->offset; 1050 gkd->di.mediasize = gkd->length; 1051 g_io_deliver(bp, 0); 1052 } 1053 1054 static void 1055 g_raid_start(struct bio *bp) 1056 { 1057 struct g_raid_softc *sc; 1058 1059 sc = bp->bio_to->geom->softc; 1060 /* 1061 * If sc == NULL or there are no valid disks, provider's error 1062 * should be set and g_raid_start() should not be called at all. 1063 */ 1064 // KASSERT(sc != NULL && sc->sc_state == G_RAID_VOLUME_S_RUNNING, 1065 // ("Provider's error should be set (error=%d)(mirror=%s).", 1066 // bp->bio_to->error, bp->bio_to->name)); 1067 G_RAID_LOGREQ(3, bp, "Request received."); 1068 1069 switch (bp->bio_cmd) { 1070 case BIO_READ: 1071 case BIO_WRITE: 1072 case BIO_DELETE: 1073 case BIO_FLUSH: 1074 break; 1075 case BIO_GETATTR: 1076 if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) 1077 g_raid_kerneldump(sc, bp); 1078 else 1079 g_io_deliver(bp, EOPNOTSUPP); 1080 return; 1081 default: 1082 g_io_deliver(bp, EOPNOTSUPP); 1083 return; 1084 } 1085 mtx_lock(&sc->sc_queue_mtx); 1086 bioq_disksort(&sc->sc_queue, bp); 1087 mtx_unlock(&sc->sc_queue_mtx); 1088 if (!dumping) { 1089 G_RAID_DEBUG1(4, sc, "Waking up %p.", sc); 1090 wakeup(sc); 1091 } 1092 } 1093 1094 static int 1095 g_raid_bio_overlaps(const struct bio *bp, off_t lstart, off_t len) 1096 { 1097 /* 1098 * 5 cases: 1099 * (1) bp entirely below NO 1100 * (2) bp entirely above NO 1101 * (3) bp start below, but end in range YES 1102 * (4) bp entirely within YES 1103 * (5) bp starts within, ends above YES 1104 * 1105 * lock range 10-19 (offset 10 length 10) 1106 * (1) 1-5: first if kicks it out 1107 * (2) 30-35: second if kicks it out 1108 * (3) 5-15: passes both ifs 1109 * (4) 12-14: passes both ifs 1110 * (5) 19-20: passes both 1111 */ 1112 off_t lend = lstart + len - 1; 1113 off_t bstart = bp->bio_offset; 1114 off_t bend = bp->bio_offset + bp->bio_length - 1; 1115 1116 if (bend < lstart) 1117 return (0); 1118 if (lend < bstart) 1119 return (0); 1120 return (1); 1121 } 1122 1123 static int 1124 g_raid_is_in_locked_range(struct g_raid_volume *vol, const struct bio *bp) 1125 { 1126 struct g_raid_lock *lp; 1127 1128 sx_assert(&vol->v_softc->sc_lock, SX_LOCKED); 1129 1130 LIST_FOREACH(lp, &vol->v_locks, l_next) { 1131 if (g_raid_bio_overlaps(bp, lp->l_offset, lp->l_length)) 1132 return (1); 1133 } 1134 return (0); 1135 } 1136 1137 static void 1138 g_raid_start_request(struct bio *bp) 1139 { 1140 struct g_raid_softc *sc; 1141 struct g_raid_volume *vol; 1142 1143 sc = bp->bio_to->geom->softc; 1144 sx_assert(&sc->sc_lock, SX_LOCKED); 1145 vol = bp->bio_to->private; 1146 1147 /* 1148 * Check to see if this item is in a locked range. If so, 1149 * queue it to our locked queue and return. We'll requeue 1150 * it when the range is unlocked. Internal I/O for the 1151 * rebuild/rescan/recovery process is excluded from this 1152 * check so we can actually do the recovery. 1153 */ 1154 if (!(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL) && 1155 g_raid_is_in_locked_range(vol, bp)) { 1156 G_RAID_LOGREQ(3, bp, "Defer request."); 1157 bioq_insert_tail(&vol->v_locked, bp); 1158 return; 1159 } 1160 1161 /* 1162 * If we're actually going to do the write/delete, then 1163 * update the idle stats for the volume. 1164 */ 1165 if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) { 1166 if (!vol->v_dirty) 1167 g_raid_dirty(vol); 1168 vol->v_writes++; 1169 } 1170 1171 /* 1172 * Put request onto inflight queue, so we can check if new 1173 * synchronization requests don't collide with it. Then tell 1174 * the transformation layer to start the I/O. 1175 */ 1176 bioq_insert_tail(&vol->v_inflight, bp); 1177 G_RAID_LOGREQ(4, bp, "Request started"); 1178 G_RAID_TR_IOSTART(vol->v_tr, bp); 1179 } 1180 1181 static void 1182 g_raid_finish_with_locked_ranges(struct g_raid_volume *vol, struct bio *bp) 1183 { 1184 off_t off, len; 1185 struct bio *nbp; 1186 struct g_raid_lock *lp; 1187 1188 vol->v_pending_lock = 0; 1189 LIST_FOREACH(lp, &vol->v_locks, l_next) { 1190 if (lp->l_pending) { 1191 off = lp->l_offset; 1192 len = lp->l_length; 1193 lp->l_pending = 0; 1194 TAILQ_FOREACH(nbp, &vol->v_inflight.queue, bio_queue) { 1195 if (g_raid_bio_overlaps(nbp, off, len)) 1196 lp->l_pending++; 1197 } 1198 if (lp->l_pending) { 1199 vol->v_pending_lock = 1; 1200 G_RAID_DEBUG1(4, vol->v_softc, 1201 "Deferred lock(%jd, %jd) has %d pending", 1202 (intmax_t)off, (intmax_t)(off + len), 1203 lp->l_pending); 1204 continue; 1205 } 1206 G_RAID_DEBUG1(4, vol->v_softc, 1207 "Deferred lock of %jd to %jd completed", 1208 (intmax_t)off, (intmax_t)(off + len)); 1209 G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg); 1210 } 1211 } 1212 } 1213 1214 void 1215 g_raid_iodone(struct bio *bp, int error) 1216 { 1217 struct g_raid_softc *sc; 1218 struct g_raid_volume *vol; 1219 1220 sc = bp->bio_to->geom->softc; 1221 sx_assert(&sc->sc_lock, SX_LOCKED); 1222 vol = bp->bio_to->private; 1223 G_RAID_LOGREQ(3, bp, "Request done: %d.", error); 1224 1225 /* Update stats if we done write/delete. */ 1226 if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) { 1227 vol->v_writes--; 1228 vol->v_last_write = time_uptime; 1229 } 1230 1231 bioq_remove(&vol->v_inflight, bp); 1232 if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp)) 1233 g_raid_finish_with_locked_ranges(vol, bp); 1234 getmicrouptime(&vol->v_last_done); 1235 g_io_deliver(bp, error); 1236 } 1237 1238 int 1239 g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len, 1240 struct bio *ignore, void *argp) 1241 { 1242 struct g_raid_softc *sc; 1243 struct g_raid_lock *lp; 1244 struct bio *bp; 1245 1246 sc = vol->v_softc; 1247 lp = malloc(sizeof(*lp), M_RAID, M_WAITOK | M_ZERO); 1248 LIST_INSERT_HEAD(&vol->v_locks, lp, l_next); 1249 lp->l_offset = off; 1250 lp->l_length = len; 1251 lp->l_callback_arg = argp; 1252 1253 lp->l_pending = 0; 1254 TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) { 1255 if (bp != ignore && g_raid_bio_overlaps(bp, off, len)) 1256 lp->l_pending++; 1257 } 1258 1259 /* 1260 * If there are any writes that are pending, we return EBUSY. All 1261 * callers will have to wait until all pending writes clear. 1262 */ 1263 if (lp->l_pending > 0) { 1264 vol->v_pending_lock = 1; 1265 G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd deferred %d pend", 1266 (intmax_t)off, (intmax_t)(off+len), lp->l_pending); 1267 return (EBUSY); 1268 } 1269 G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd", 1270 (intmax_t)off, (intmax_t)(off+len)); 1271 G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg); 1272 return (0); 1273 } 1274 1275 int 1276 g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len) 1277 { 1278 struct g_raid_lock *lp; 1279 struct g_raid_softc *sc; 1280 struct bio *bp; 1281 1282 sc = vol->v_softc; 1283 LIST_FOREACH(lp, &vol->v_locks, l_next) { 1284 if (lp->l_offset == off && lp->l_length == len) { 1285 LIST_REMOVE(lp, l_next); 1286 /* XXX 1287 * Right now we just put them all back on the queue 1288 * and hope for the best. We hope this because any 1289 * locked ranges will go right back on this list 1290 * when the worker thread runs. 1291 * XXX 1292 */ 1293 G_RAID_DEBUG1(4, sc, "Unlocked %jd to %jd", 1294 (intmax_t)lp->l_offset, 1295 (intmax_t)(lp->l_offset+lp->l_length)); 1296 mtx_lock(&sc->sc_queue_mtx); 1297 while ((bp = bioq_takefirst(&vol->v_locked)) != NULL) 1298 bioq_disksort(&sc->sc_queue, bp); 1299 mtx_unlock(&sc->sc_queue_mtx); 1300 free(lp, M_RAID); 1301 return (0); 1302 } 1303 } 1304 return (EINVAL); 1305 } 1306 1307 void 1308 g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp) 1309 { 1310 struct g_consumer *cp; 1311 struct g_raid_disk *disk, *tdisk; 1312 1313 bp->bio_caller1 = sd; 1314 1315 /* 1316 * Make sure that the disk is present. Generally it is a task of 1317 * transformation layers to not send requests to absent disks, but 1318 * it is better to be safe and report situation then sorry. 1319 */ 1320 if (sd->sd_disk == NULL) { 1321 G_RAID_LOGREQ(0, bp, "Warning! I/O request to an absent disk!"); 1322 nodisk: 1323 bp->bio_from = NULL; 1324 bp->bio_to = NULL; 1325 bp->bio_error = ENXIO; 1326 g_raid_disk_done(bp); 1327 return; 1328 } 1329 disk = sd->sd_disk; 1330 if (disk->d_state != G_RAID_DISK_S_ACTIVE && 1331 disk->d_state != G_RAID_DISK_S_FAILED) { 1332 G_RAID_LOGREQ(0, bp, "Warning! I/O request to a disk in a " 1333 "wrong state (%s)!", g_raid_disk_state2str(disk->d_state)); 1334 goto nodisk; 1335 } 1336 1337 cp = disk->d_consumer; 1338 bp->bio_from = cp; 1339 bp->bio_to = cp->provider; 1340 cp->index++; 1341 1342 /* Update average disks load. */ 1343 TAILQ_FOREACH(tdisk, &sd->sd_softc->sc_disks, d_next) { 1344 if (tdisk->d_consumer == NULL) 1345 tdisk->d_load = 0; 1346 else 1347 tdisk->d_load = (tdisk->d_consumer->index * 1348 G_RAID_SUBDISK_LOAD_SCALE + tdisk->d_load * 7) / 8; 1349 } 1350 1351 disk->d_last_offset = bp->bio_offset + bp->bio_length; 1352 if (dumping) { 1353 G_RAID_LOGREQ(3, bp, "Sending dumping request."); 1354 if (bp->bio_cmd == BIO_WRITE) { 1355 bp->bio_error = g_raid_subdisk_kerneldump(sd, 1356 bp->bio_data, 0, bp->bio_offset, bp->bio_length); 1357 } else 1358 bp->bio_error = EOPNOTSUPP; 1359 g_raid_disk_done(bp); 1360 } else { 1361 bp->bio_done = g_raid_disk_done; 1362 bp->bio_offset += sd->sd_offset; 1363 G_RAID_LOGREQ(3, bp, "Sending request."); 1364 g_io_request(bp, cp); 1365 } 1366 } 1367 1368 int 1369 g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd, 1370 void *virtual, vm_offset_t physical, off_t offset, size_t length) 1371 { 1372 1373 if (sd->sd_disk == NULL) 1374 return (ENXIO); 1375 if (sd->sd_disk->d_kd.di.dumper == NULL) 1376 return (EOPNOTSUPP); 1377 return (dump_write(&sd->sd_disk->d_kd.di, 1378 virtual, physical, 1379 sd->sd_disk->d_kd.di.mediaoffset + sd->sd_offset + offset, 1380 length)); 1381 } 1382 1383 static void 1384 g_raid_disk_done(struct bio *bp) 1385 { 1386 struct g_raid_softc *sc; 1387 struct g_raid_subdisk *sd; 1388 1389 sd = bp->bio_caller1; 1390 sc = sd->sd_softc; 1391 mtx_lock(&sc->sc_queue_mtx); 1392 bioq_disksort(&sc->sc_queue, bp); 1393 mtx_unlock(&sc->sc_queue_mtx); 1394 if (!dumping) 1395 wakeup(sc); 1396 } 1397 1398 static void 1399 g_raid_disk_done_request(struct bio *bp) 1400 { 1401 struct g_raid_softc *sc; 1402 struct g_raid_disk *disk; 1403 struct g_raid_subdisk *sd; 1404 struct g_raid_volume *vol; 1405 1406 g_topology_assert_not(); 1407 1408 G_RAID_LOGREQ(3, bp, "Disk request done: %d.", bp->bio_error); 1409 sd = bp->bio_caller1; 1410 sc = sd->sd_softc; 1411 vol = sd->sd_volume; 1412 if (bp->bio_from != NULL) { 1413 bp->bio_from->index--; 1414 disk = bp->bio_from->private; 1415 if (disk == NULL) 1416 g_raid_kill_consumer(sc, bp->bio_from); 1417 } 1418 bp->bio_offset -= sd->sd_offset; 1419 1420 G_RAID_TR_IODONE(vol->v_tr, sd, bp); 1421 } 1422 1423 static void 1424 g_raid_handle_event(struct g_raid_softc *sc, struct g_raid_event *ep) 1425 { 1426 1427 if ((ep->e_flags & G_RAID_EVENT_VOLUME) != 0) 1428 ep->e_error = g_raid_update_volume(ep->e_tgt, ep->e_event); 1429 else if ((ep->e_flags & G_RAID_EVENT_DISK) != 0) 1430 ep->e_error = g_raid_update_disk(ep->e_tgt, ep->e_event); 1431 else if ((ep->e_flags & G_RAID_EVENT_SUBDISK) != 0) 1432 ep->e_error = g_raid_update_subdisk(ep->e_tgt, ep->e_event); 1433 else 1434 ep->e_error = g_raid_update_node(ep->e_tgt, ep->e_event); 1435 if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) { 1436 KASSERT(ep->e_error == 0, 1437 ("Error cannot be handled.")); 1438 g_raid_event_free(ep); 1439 } else { 1440 ep->e_flags |= G_RAID_EVENT_DONE; 1441 G_RAID_DEBUG1(4, sc, "Waking up %p.", ep); 1442 mtx_lock(&sc->sc_queue_mtx); 1443 wakeup(ep); 1444 mtx_unlock(&sc->sc_queue_mtx); 1445 } 1446 } 1447 1448 /* 1449 * Worker thread. 1450 */ 1451 static void 1452 g_raid_worker(void *arg) 1453 { 1454 struct g_raid_softc *sc; 1455 struct g_raid_event *ep; 1456 struct g_raid_volume *vol; 1457 struct bio *bp; 1458 struct timeval now, t; 1459 int timeout, rv; 1460 1461 sc = arg; 1462 thread_lock(curthread); 1463 sched_prio(curthread, PRIBIO); 1464 thread_unlock(curthread); 1465 1466 sx_xlock(&sc->sc_lock); 1467 for (;;) { 1468 mtx_lock(&sc->sc_queue_mtx); 1469 /* 1470 * First take a look at events. 1471 * This is important to handle events before any I/O requests. 1472 */ 1473 bp = NULL; 1474 vol = NULL; 1475 rv = 0; 1476 ep = TAILQ_FIRST(&sc->sc_events); 1477 if (ep != NULL) 1478 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 1479 else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) 1480 ; 1481 else { 1482 getmicrouptime(&now); 1483 t = now; 1484 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1485 if (bioq_first(&vol->v_inflight) == NULL && 1486 vol->v_tr && 1487 timevalcmp(&vol->v_last_done, &t, < )) 1488 t = vol->v_last_done; 1489 } 1490 timevalsub(&t, &now); 1491 timeout = g_raid_idle_threshold + 1492 t.tv_sec * 1000000 + t.tv_usec; 1493 if (timeout > 0) { 1494 /* 1495 * Two steps to avoid overflows at HZ=1000 1496 * and idle timeouts > 2.1s. Some rounding 1497 * errors can occur, but they are < 1tick, 1498 * which is deemed to be close enough for 1499 * this purpose. 1500 */ 1501 int micpertic = 1000000 / hz; 1502 timeout = (timeout + micpertic - 1) / micpertic; 1503 sx_xunlock(&sc->sc_lock); 1504 MSLEEP(rv, sc, &sc->sc_queue_mtx, 1505 PRIBIO | PDROP, "-", timeout); 1506 sx_xlock(&sc->sc_lock); 1507 goto process; 1508 } else 1509 rv = EWOULDBLOCK; 1510 } 1511 mtx_unlock(&sc->sc_queue_mtx); 1512 process: 1513 if (ep != NULL) { 1514 g_raid_handle_event(sc, ep); 1515 } else if (bp != NULL) { 1516 if (bp->bio_to != NULL && 1517 bp->bio_to->geom == sc->sc_geom) 1518 g_raid_start_request(bp); 1519 else 1520 g_raid_disk_done_request(bp); 1521 } else if (rv == EWOULDBLOCK) { 1522 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1523 if (vol->v_writes == 0 && vol->v_dirty) 1524 g_raid_clean(vol, -1); 1525 if (bioq_first(&vol->v_inflight) == NULL && 1526 vol->v_tr) { 1527 t.tv_sec = g_raid_idle_threshold / 1000000; 1528 t.tv_usec = g_raid_idle_threshold % 1000000; 1529 timevaladd(&t, &vol->v_last_done); 1530 getmicrouptime(&now); 1531 if (timevalcmp(&t, &now, <= )) { 1532 G_RAID_TR_IDLE(vol->v_tr); 1533 vol->v_last_done = now; 1534 } 1535 } 1536 } 1537 } 1538 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1539 g_raid_destroy_node(sc, 1); /* May not return. */ 1540 } 1541 } 1542 1543 static void 1544 g_raid_poll(struct g_raid_softc *sc) 1545 { 1546 struct g_raid_event *ep; 1547 struct bio *bp; 1548 1549 sx_xlock(&sc->sc_lock); 1550 mtx_lock(&sc->sc_queue_mtx); 1551 /* 1552 * First take a look at events. 1553 * This is important to handle events before any I/O requests. 1554 */ 1555 ep = TAILQ_FIRST(&sc->sc_events); 1556 if (ep != NULL) { 1557 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 1558 mtx_unlock(&sc->sc_queue_mtx); 1559 g_raid_handle_event(sc, ep); 1560 goto out; 1561 } 1562 bp = bioq_takefirst(&sc->sc_queue); 1563 if (bp != NULL) { 1564 mtx_unlock(&sc->sc_queue_mtx); 1565 if (bp->bio_from == NULL || 1566 bp->bio_from->geom != sc->sc_geom) 1567 g_raid_start_request(bp); 1568 else 1569 g_raid_disk_done_request(bp); 1570 } 1571 out: 1572 sx_xunlock(&sc->sc_lock); 1573 } 1574 1575 static void 1576 g_raid_launch_provider(struct g_raid_volume *vol) 1577 { 1578 struct g_raid_disk *disk; 1579 struct g_raid_softc *sc; 1580 struct g_provider *pp; 1581 char name[G_RAID_MAX_VOLUMENAME]; 1582 off_t off; 1583 1584 sc = vol->v_softc; 1585 sx_assert(&sc->sc_lock, SX_LOCKED); 1586 1587 g_topology_lock(); 1588 /* Try to name provider with volume name. */ 1589 snprintf(name, sizeof(name), "raid/%s", vol->v_name); 1590 if (g_raid_name_format == 0 || vol->v_name[0] == 0 || 1591 g_provider_by_name(name) != NULL) { 1592 /* Otherwise use sequential volume number. */ 1593 snprintf(name, sizeof(name), "raid/r%d", vol->v_global_id); 1594 } 1595 pp = g_new_providerf(sc->sc_geom, "%s", name); 1596 pp->private = vol; 1597 pp->mediasize = vol->v_mediasize; 1598 pp->sectorsize = vol->v_sectorsize; 1599 pp->stripesize = 0; 1600 pp->stripeoffset = 0; 1601 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1602 vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || 1603 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE || 1604 vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) { 1605 if ((disk = vol->v_subdisks[0].sd_disk) != NULL && 1606 disk->d_consumer != NULL && 1607 disk->d_consumer->provider != NULL) { 1608 pp->stripesize = disk->d_consumer->provider->stripesize; 1609 off = disk->d_consumer->provider->stripeoffset; 1610 pp->stripeoffset = off + vol->v_subdisks[0].sd_offset; 1611 if (off > 0) 1612 pp->stripeoffset %= off; 1613 } 1614 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) { 1615 pp->stripesize *= (vol->v_disks_count - 1); 1616 pp->stripeoffset *= (vol->v_disks_count - 1); 1617 } 1618 } else 1619 pp->stripesize = vol->v_strip_size; 1620 vol->v_provider = pp; 1621 g_error_provider(pp, 0); 1622 g_topology_unlock(); 1623 G_RAID_DEBUG1(0, sc, "Provider %s for volume %s created.", 1624 pp->name, vol->v_name); 1625 } 1626 1627 static void 1628 g_raid_destroy_provider(struct g_raid_volume *vol) 1629 { 1630 struct g_raid_softc *sc; 1631 struct g_provider *pp; 1632 struct bio *bp, *tmp; 1633 1634 g_topology_assert_not(); 1635 sc = vol->v_softc; 1636 pp = vol->v_provider; 1637 KASSERT(pp != NULL, ("NULL provider (volume=%s).", vol->v_name)); 1638 1639 g_topology_lock(); 1640 g_error_provider(pp, ENXIO); 1641 mtx_lock(&sc->sc_queue_mtx); 1642 TAILQ_FOREACH_SAFE(bp, &sc->sc_queue.queue, bio_queue, tmp) { 1643 if (bp->bio_to != pp) 1644 continue; 1645 bioq_remove(&sc->sc_queue, bp); 1646 g_io_deliver(bp, ENXIO); 1647 } 1648 mtx_unlock(&sc->sc_queue_mtx); 1649 G_RAID_DEBUG1(0, sc, "Provider %s for volume %s destroyed.", 1650 pp->name, vol->v_name); 1651 g_wither_provider(pp, ENXIO); 1652 g_topology_unlock(); 1653 vol->v_provider = NULL; 1654 } 1655 1656 /* 1657 * Update device state. 1658 */ 1659 static int 1660 g_raid_update_volume(struct g_raid_volume *vol, u_int event) 1661 { 1662 struct g_raid_softc *sc; 1663 1664 sc = vol->v_softc; 1665 sx_assert(&sc->sc_lock, SX_XLOCKED); 1666 1667 G_RAID_DEBUG1(2, sc, "Event %s for volume %s.", 1668 g_raid_volume_event2str(event), 1669 vol->v_name); 1670 switch (event) { 1671 case G_RAID_VOLUME_E_DOWN: 1672 if (vol->v_provider != NULL) 1673 g_raid_destroy_provider(vol); 1674 break; 1675 case G_RAID_VOLUME_E_UP: 1676 if (vol->v_provider == NULL) 1677 g_raid_launch_provider(vol); 1678 break; 1679 case G_RAID_VOLUME_E_START: 1680 if (vol->v_tr) 1681 G_RAID_TR_START(vol->v_tr); 1682 return (0); 1683 default: 1684 if (sc->sc_md) 1685 G_RAID_MD_VOLUME_EVENT(sc->sc_md, vol, event); 1686 return (0); 1687 } 1688 1689 /* Manage root mount release. */ 1690 if (vol->v_starting) { 1691 vol->v_starting = 0; 1692 G_RAID_DEBUG1(1, sc, "root_mount_rel %p", vol->v_rootmount); 1693 root_mount_rel(vol->v_rootmount); 1694 vol->v_rootmount = NULL; 1695 } 1696 if (vol->v_stopping && vol->v_provider_open == 0) 1697 g_raid_destroy_volume(vol); 1698 return (0); 1699 } 1700 1701 /* 1702 * Update subdisk state. 1703 */ 1704 static int 1705 g_raid_update_subdisk(struct g_raid_subdisk *sd, u_int event) 1706 { 1707 struct g_raid_softc *sc; 1708 struct g_raid_volume *vol; 1709 1710 sc = sd->sd_softc; 1711 vol = sd->sd_volume; 1712 sx_assert(&sc->sc_lock, SX_XLOCKED); 1713 1714 G_RAID_DEBUG1(2, sc, "Event %s for subdisk %s:%d-%s.", 1715 g_raid_subdisk_event2str(event), 1716 vol->v_name, sd->sd_pos, 1717 sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]"); 1718 if (vol->v_tr) 1719 G_RAID_TR_EVENT(vol->v_tr, sd, event); 1720 1721 return (0); 1722 } 1723 1724 /* 1725 * Update disk state. 1726 */ 1727 static int 1728 g_raid_update_disk(struct g_raid_disk *disk, u_int event) 1729 { 1730 struct g_raid_softc *sc; 1731 1732 sc = disk->d_softc; 1733 sx_assert(&sc->sc_lock, SX_XLOCKED); 1734 1735 G_RAID_DEBUG1(2, sc, "Event %s for disk %s.", 1736 g_raid_disk_event2str(event), 1737 g_raid_get_diskname(disk)); 1738 1739 if (sc->sc_md) 1740 G_RAID_MD_EVENT(sc->sc_md, disk, event); 1741 return (0); 1742 } 1743 1744 /* 1745 * Node event. 1746 */ 1747 static int 1748 g_raid_update_node(struct g_raid_softc *sc, u_int event) 1749 { 1750 sx_assert(&sc->sc_lock, SX_XLOCKED); 1751 1752 G_RAID_DEBUG1(2, sc, "Event %s for the array.", 1753 g_raid_node_event2str(event)); 1754 1755 if (event == G_RAID_NODE_E_WAKE) 1756 return (0); 1757 if (sc->sc_md) 1758 G_RAID_MD_EVENT(sc->sc_md, NULL, event); 1759 return (0); 1760 } 1761 1762 static int 1763 g_raid_access(struct g_provider *pp, int acr, int acw, int ace) 1764 { 1765 struct g_raid_volume *vol; 1766 struct g_raid_softc *sc; 1767 int dcw, opens, error = 0; 1768 1769 g_topology_assert(); 1770 sc = pp->geom->softc; 1771 vol = pp->private; 1772 KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 1773 KASSERT(vol != NULL, ("NULL volume (provider=%s).", pp->name)); 1774 1775 G_RAID_DEBUG1(2, sc, "Access request for %s: r%dw%de%d.", pp->name, 1776 acr, acw, ace); 1777 dcw = pp->acw + acw; 1778 1779 g_topology_unlock(); 1780 sx_xlock(&sc->sc_lock); 1781 /* Deny new opens while dying. */ 1782 if (sc->sc_stopping != 0 && (acr > 0 || acw > 0 || ace > 0)) { 1783 error = ENXIO; 1784 goto out; 1785 } 1786 if (dcw == 0 && vol->v_dirty) 1787 g_raid_clean(vol, dcw); 1788 vol->v_provider_open += acr + acw + ace; 1789 /* Handle delayed node destruction. */ 1790 if (sc->sc_stopping == G_RAID_DESTROY_DELAYED && 1791 vol->v_provider_open == 0) { 1792 /* Count open volumes. */ 1793 opens = g_raid_nopens(sc); 1794 if (opens == 0) { 1795 sc->sc_stopping = G_RAID_DESTROY_HARD; 1796 /* Wake up worker to make it selfdestruct. */ 1797 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); 1798 } 1799 } 1800 /* Handle open volume destruction. */ 1801 if (vol->v_stopping && vol->v_provider_open == 0) 1802 g_raid_destroy_volume(vol); 1803 out: 1804 sx_xunlock(&sc->sc_lock); 1805 g_topology_lock(); 1806 return (error); 1807 } 1808 1809 struct g_raid_softc * 1810 g_raid_create_node(struct g_class *mp, 1811 const char *name, struct g_raid_md_object *md) 1812 { 1813 struct g_raid_softc *sc; 1814 struct g_geom *gp; 1815 int error; 1816 1817 g_topology_assert(); 1818 G_RAID_DEBUG(1, "Creating array %s.", name); 1819 1820 gp = g_new_geomf(mp, "%s", name); 1821 sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO); 1822 gp->start = g_raid_start; 1823 gp->orphan = g_raid_orphan; 1824 gp->access = g_raid_access; 1825 gp->dumpconf = g_raid_dumpconf; 1826 1827 sc->sc_md = md; 1828 sc->sc_geom = gp; 1829 sc->sc_flags = 0; 1830 TAILQ_INIT(&sc->sc_volumes); 1831 TAILQ_INIT(&sc->sc_disks); 1832 sx_init(&sc->sc_lock, "graid:lock"); 1833 mtx_init(&sc->sc_queue_mtx, "graid:queue", NULL, MTX_DEF); 1834 TAILQ_INIT(&sc->sc_events); 1835 bioq_init(&sc->sc_queue); 1836 gp->softc = sc; 1837 error = kproc_create(g_raid_worker, sc, &sc->sc_worker, 0, 0, 1838 "g_raid %s", name); 1839 if (error != 0) { 1840 G_RAID_DEBUG(0, "Cannot create kernel thread for %s.", name); 1841 mtx_destroy(&sc->sc_queue_mtx); 1842 sx_destroy(&sc->sc_lock); 1843 g_destroy_geom(sc->sc_geom); 1844 free(sc, M_RAID); 1845 return (NULL); 1846 } 1847 1848 G_RAID_DEBUG1(0, sc, "Array %s created.", name); 1849 return (sc); 1850 } 1851 1852 struct g_raid_volume * 1853 g_raid_create_volume(struct g_raid_softc *sc, const char *name, int id) 1854 { 1855 struct g_raid_volume *vol, *vol1; 1856 int i; 1857 1858 G_RAID_DEBUG1(1, sc, "Creating volume %s.", name); 1859 vol = malloc(sizeof(*vol), M_RAID, M_WAITOK | M_ZERO); 1860 vol->v_softc = sc; 1861 strlcpy(vol->v_name, name, G_RAID_MAX_VOLUMENAME); 1862 vol->v_state = G_RAID_VOLUME_S_STARTING; 1863 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 1864 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN; 1865 vol->v_rotate_parity = 1; 1866 bioq_init(&vol->v_inflight); 1867 bioq_init(&vol->v_locked); 1868 LIST_INIT(&vol->v_locks); 1869 for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) { 1870 vol->v_subdisks[i].sd_softc = sc; 1871 vol->v_subdisks[i].sd_volume = vol; 1872 vol->v_subdisks[i].sd_pos = i; 1873 vol->v_subdisks[i].sd_state = G_RAID_DISK_S_NONE; 1874 } 1875 1876 /* Find free ID for this volume. */ 1877 g_topology_lock(); 1878 vol1 = vol; 1879 if (id >= 0) { 1880 LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) { 1881 if (vol1->v_global_id == id) 1882 break; 1883 } 1884 } 1885 if (vol1 != NULL) { 1886 for (id = 0; ; id++) { 1887 LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) { 1888 if (vol1->v_global_id == id) 1889 break; 1890 } 1891 if (vol1 == NULL) 1892 break; 1893 } 1894 } 1895 vol->v_global_id = id; 1896 LIST_INSERT_HEAD(&g_raid_volumes, vol, v_global_next); 1897 g_topology_unlock(); 1898 1899 /* Delay root mounting. */ 1900 vol->v_rootmount = root_mount_hold("GRAID"); 1901 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", vol->v_rootmount); 1902 vol->v_starting = 1; 1903 TAILQ_INSERT_TAIL(&sc->sc_volumes, vol, v_next); 1904 return (vol); 1905 } 1906 1907 struct g_raid_disk * 1908 g_raid_create_disk(struct g_raid_softc *sc) 1909 { 1910 struct g_raid_disk *disk; 1911 1912 G_RAID_DEBUG1(1, sc, "Creating disk."); 1913 disk = malloc(sizeof(*disk), M_RAID, M_WAITOK | M_ZERO); 1914 disk->d_softc = sc; 1915 disk->d_state = G_RAID_DISK_S_NONE; 1916 TAILQ_INIT(&disk->d_subdisks); 1917 TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next); 1918 return (disk); 1919 } 1920 1921 int g_raid_start_volume(struct g_raid_volume *vol) 1922 { 1923 struct g_raid_tr_class *class; 1924 struct g_raid_tr_object *obj; 1925 int status; 1926 1927 G_RAID_DEBUG1(2, vol->v_softc, "Starting volume %s.", vol->v_name); 1928 LIST_FOREACH(class, &g_raid_tr_classes, trc_list) { 1929 if (!class->trc_enable) 1930 continue; 1931 G_RAID_DEBUG1(2, vol->v_softc, 1932 "Tasting volume %s for %s transformation.", 1933 vol->v_name, class->name); 1934 obj = (void *)kobj_create((kobj_class_t)class, M_RAID, 1935 M_WAITOK); 1936 obj->tro_class = class; 1937 obj->tro_volume = vol; 1938 status = G_RAID_TR_TASTE(obj, vol); 1939 if (status != G_RAID_TR_TASTE_FAIL) 1940 break; 1941 kobj_delete((kobj_t)obj, M_RAID); 1942 } 1943 if (class == NULL) { 1944 G_RAID_DEBUG1(0, vol->v_softc, 1945 "No transformation module found for %s.", 1946 vol->v_name); 1947 vol->v_tr = NULL; 1948 g_raid_change_volume_state(vol, G_RAID_VOLUME_S_UNSUPPORTED); 1949 g_raid_event_send(vol, G_RAID_VOLUME_E_DOWN, 1950 G_RAID_EVENT_VOLUME); 1951 return (-1); 1952 } 1953 G_RAID_DEBUG1(2, vol->v_softc, 1954 "Transformation module %s chosen for %s.", 1955 class->name, vol->v_name); 1956 vol->v_tr = obj; 1957 return (0); 1958 } 1959 1960 int 1961 g_raid_destroy_node(struct g_raid_softc *sc, int worker) 1962 { 1963 struct g_raid_volume *vol, *tmpv; 1964 struct g_raid_disk *disk, *tmpd; 1965 int error = 0; 1966 1967 sc->sc_stopping = G_RAID_DESTROY_HARD; 1968 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tmpv) { 1969 if (g_raid_destroy_volume(vol)) 1970 error = EBUSY; 1971 } 1972 if (error) 1973 return (error); 1974 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tmpd) { 1975 if (g_raid_destroy_disk(disk)) 1976 error = EBUSY; 1977 } 1978 if (error) 1979 return (error); 1980 if (sc->sc_md) { 1981 G_RAID_MD_FREE(sc->sc_md); 1982 kobj_delete((kobj_t)sc->sc_md, M_RAID); 1983 sc->sc_md = NULL; 1984 } 1985 if (sc->sc_geom != NULL) { 1986 G_RAID_DEBUG1(0, sc, "Array %s destroyed.", sc->sc_name); 1987 g_topology_lock(); 1988 sc->sc_geom->softc = NULL; 1989 g_wither_geom(sc->sc_geom, ENXIO); 1990 g_topology_unlock(); 1991 sc->sc_geom = NULL; 1992 } else 1993 G_RAID_DEBUG(1, "Array destroyed."); 1994 if (worker) { 1995 g_raid_event_cancel(sc, sc); 1996 mtx_destroy(&sc->sc_queue_mtx); 1997 sx_xunlock(&sc->sc_lock); 1998 sx_destroy(&sc->sc_lock); 1999 wakeup(&sc->sc_stopping); 2000 free(sc, M_RAID); 2001 curthread->td_pflags &= ~TDP_GEOM; 2002 G_RAID_DEBUG(1, "Thread exiting."); 2003 kproc_exit(0); 2004 } else { 2005 /* Wake up worker to make it selfdestruct. */ 2006 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); 2007 } 2008 return (0); 2009 } 2010 2011 int 2012 g_raid_destroy_volume(struct g_raid_volume *vol) 2013 { 2014 struct g_raid_softc *sc; 2015 struct g_raid_disk *disk; 2016 int i; 2017 2018 sc = vol->v_softc; 2019 G_RAID_DEBUG1(2, sc, "Destroying volume %s.", vol->v_name); 2020 vol->v_stopping = 1; 2021 if (vol->v_state != G_RAID_VOLUME_S_STOPPED) { 2022 if (vol->v_tr) { 2023 G_RAID_TR_STOP(vol->v_tr); 2024 return (EBUSY); 2025 } else 2026 vol->v_state = G_RAID_VOLUME_S_STOPPED; 2027 } 2028 if (g_raid_event_check(sc, vol) != 0) 2029 return (EBUSY); 2030 if (vol->v_provider != NULL) 2031 return (EBUSY); 2032 if (vol->v_provider_open != 0) 2033 return (EBUSY); 2034 if (vol->v_tr) { 2035 G_RAID_TR_FREE(vol->v_tr); 2036 kobj_delete((kobj_t)vol->v_tr, M_RAID); 2037 vol->v_tr = NULL; 2038 } 2039 if (vol->v_rootmount) 2040 root_mount_rel(vol->v_rootmount); 2041 g_topology_lock(); 2042 LIST_REMOVE(vol, v_global_next); 2043 g_topology_unlock(); 2044 TAILQ_REMOVE(&sc->sc_volumes, vol, v_next); 2045 for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) { 2046 g_raid_event_cancel(sc, &vol->v_subdisks[i]); 2047 disk = vol->v_subdisks[i].sd_disk; 2048 if (disk == NULL) 2049 continue; 2050 TAILQ_REMOVE(&disk->d_subdisks, &vol->v_subdisks[i], sd_next); 2051 } 2052 G_RAID_DEBUG1(2, sc, "Volume %s destroyed.", vol->v_name); 2053 if (sc->sc_md) 2054 G_RAID_MD_FREE_VOLUME(sc->sc_md, vol); 2055 g_raid_event_cancel(sc, vol); 2056 free(vol, M_RAID); 2057 if (sc->sc_stopping == G_RAID_DESTROY_HARD) { 2058 /* Wake up worker to let it selfdestruct. */ 2059 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); 2060 } 2061 return (0); 2062 } 2063 2064 int 2065 g_raid_destroy_disk(struct g_raid_disk *disk) 2066 { 2067 struct g_raid_softc *sc; 2068 struct g_raid_subdisk *sd, *tmp; 2069 2070 sc = disk->d_softc; 2071 G_RAID_DEBUG1(2, sc, "Destroying disk."); 2072 if (disk->d_consumer) { 2073 g_raid_kill_consumer(sc, disk->d_consumer); 2074 disk->d_consumer = NULL; 2075 } 2076 TAILQ_FOREACH_SAFE(sd, &disk->d_subdisks, sd_next, tmp) { 2077 g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_NONE); 2078 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, 2079 G_RAID_EVENT_SUBDISK); 2080 TAILQ_REMOVE(&disk->d_subdisks, sd, sd_next); 2081 sd->sd_disk = NULL; 2082 } 2083 TAILQ_REMOVE(&sc->sc_disks, disk, d_next); 2084 if (sc->sc_md) 2085 G_RAID_MD_FREE_DISK(sc->sc_md, disk); 2086 g_raid_event_cancel(sc, disk); 2087 free(disk, M_RAID); 2088 return (0); 2089 } 2090 2091 int 2092 g_raid_destroy(struct g_raid_softc *sc, int how) 2093 { 2094 int opens; 2095 2096 g_topology_assert_not(); 2097 if (sc == NULL) 2098 return (ENXIO); 2099 sx_assert(&sc->sc_lock, SX_XLOCKED); 2100 2101 /* Count open volumes. */ 2102 opens = g_raid_nopens(sc); 2103 2104 /* React on some opened volumes. */ 2105 if (opens > 0) { 2106 switch (how) { 2107 case G_RAID_DESTROY_SOFT: 2108 G_RAID_DEBUG1(1, sc, 2109 "%d volumes are still open.", 2110 opens); 2111 return (EBUSY); 2112 case G_RAID_DESTROY_DELAYED: 2113 G_RAID_DEBUG1(1, sc, 2114 "Array will be destroyed on last close."); 2115 sc->sc_stopping = G_RAID_DESTROY_DELAYED; 2116 return (EBUSY); 2117 case G_RAID_DESTROY_HARD: 2118 G_RAID_DEBUG1(1, sc, 2119 "%d volumes are still open.", 2120 opens); 2121 } 2122 } 2123 2124 /* Mark node for destruction. */ 2125 sc->sc_stopping = G_RAID_DESTROY_HARD; 2126 /* Wake up worker to let it selfdestruct. */ 2127 g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); 2128 /* Sleep until node destroyed. */ 2129 sx_sleep(&sc->sc_stopping, &sc->sc_lock, 2130 PRIBIO | PDROP, "r:destroy", 0); 2131 return (0); 2132 } 2133 2134 static void 2135 g_raid_taste_orphan(struct g_consumer *cp) 2136 { 2137 2138 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2139 cp->provider->name)); 2140 } 2141 2142 static struct g_geom * 2143 g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 2144 { 2145 struct g_consumer *cp; 2146 struct g_geom *gp, *geom; 2147 struct g_raid_md_class *class; 2148 struct g_raid_md_object *obj; 2149 int status; 2150 2151 g_topology_assert(); 2152 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 2153 if (!g_raid_enable) 2154 return (NULL); 2155 G_RAID_DEBUG(2, "Tasting provider %s.", pp->name); 2156 2157 gp = g_new_geomf(mp, "raid:taste"); 2158 /* 2159 * This orphan function should be never called. 2160 */ 2161 gp->orphan = g_raid_taste_orphan; 2162 cp = g_new_consumer(gp); 2163 g_attach(cp, pp); 2164 2165 geom = NULL; 2166 LIST_FOREACH(class, &g_raid_md_classes, mdc_list) { 2167 if (!class->mdc_enable) 2168 continue; 2169 G_RAID_DEBUG(2, "Tasting provider %s for %s metadata.", 2170 pp->name, class->name); 2171 obj = (void *)kobj_create((kobj_class_t)class, M_RAID, 2172 M_WAITOK); 2173 obj->mdo_class = class; 2174 status = G_RAID_MD_TASTE(obj, mp, cp, &geom); 2175 if (status != G_RAID_MD_TASTE_NEW) 2176 kobj_delete((kobj_t)obj, M_RAID); 2177 if (status != G_RAID_MD_TASTE_FAIL) 2178 break; 2179 } 2180 2181 g_detach(cp); 2182 g_destroy_consumer(cp); 2183 g_destroy_geom(gp); 2184 G_RAID_DEBUG(2, "Tasting provider %s done.", pp->name); 2185 return (geom); 2186 } 2187 2188 int 2189 g_raid_create_node_format(const char *format, struct gctl_req *req, 2190 struct g_geom **gp) 2191 { 2192 struct g_raid_md_class *class; 2193 struct g_raid_md_object *obj; 2194 int status; 2195 2196 G_RAID_DEBUG(2, "Creating array for %s metadata.", format); 2197 LIST_FOREACH(class, &g_raid_md_classes, mdc_list) { 2198 if (strcasecmp(class->name, format) == 0) 2199 break; 2200 } 2201 if (class == NULL) { 2202 G_RAID_DEBUG(1, "No support for %s metadata.", format); 2203 return (G_RAID_MD_TASTE_FAIL); 2204 } 2205 obj = (void *)kobj_create((kobj_class_t)class, M_RAID, 2206 M_WAITOK); 2207 obj->mdo_class = class; 2208 status = G_RAID_MD_CREATE_REQ(obj, &g_raid_class, req, gp); 2209 if (status != G_RAID_MD_TASTE_NEW) 2210 kobj_delete((kobj_t)obj, M_RAID); 2211 return (status); 2212 } 2213 2214 static int 2215 g_raid_destroy_geom(struct gctl_req *req __unused, 2216 struct g_class *mp __unused, struct g_geom *gp) 2217 { 2218 struct g_raid_softc *sc; 2219 int error; 2220 2221 g_topology_unlock(); 2222 sc = gp->softc; 2223 sx_xlock(&sc->sc_lock); 2224 g_cancel_event(sc); 2225 error = g_raid_destroy(gp->softc, G_RAID_DESTROY_SOFT); 2226 if (error != 0) 2227 sx_xunlock(&sc->sc_lock); 2228 g_topology_lock(); 2229 return (error); 2230 } 2231 2232 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol, 2233 struct g_raid_subdisk *sd, struct g_raid_disk *disk) 2234 { 2235 2236 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 2237 return; 2238 if (sc->sc_md) 2239 G_RAID_MD_WRITE(sc->sc_md, vol, sd, disk); 2240 } 2241 2242 void g_raid_fail_disk(struct g_raid_softc *sc, 2243 struct g_raid_subdisk *sd, struct g_raid_disk *disk) 2244 { 2245 2246 if (disk == NULL) 2247 disk = sd->sd_disk; 2248 if (disk == NULL) { 2249 G_RAID_DEBUG1(0, sc, "Warning! Fail request to an absent disk!"); 2250 return; 2251 } 2252 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 2253 G_RAID_DEBUG1(0, sc, "Warning! Fail request to a disk in a " 2254 "wrong state (%s)!", g_raid_disk_state2str(disk->d_state)); 2255 return; 2256 } 2257 if (sc->sc_md) 2258 G_RAID_MD_FAIL_DISK(sc->sc_md, sd, disk); 2259 } 2260 2261 static void 2262 g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2263 struct g_consumer *cp, struct g_provider *pp) 2264 { 2265 struct g_raid_softc *sc; 2266 struct g_raid_volume *vol; 2267 struct g_raid_subdisk *sd; 2268 struct g_raid_disk *disk; 2269 int i, s; 2270 2271 g_topology_assert(); 2272 2273 sc = gp->softc; 2274 if (sc == NULL) 2275 return; 2276 if (pp != NULL) { 2277 vol = pp->private; 2278 g_topology_unlock(); 2279 sx_xlock(&sc->sc_lock); 2280 sbuf_printf(sb, "%s<Label>%s</Label>\n", indent, 2281 vol->v_name); 2282 sbuf_printf(sb, "%s<RAIDLevel>%s</RAIDLevel>\n", indent, 2283 g_raid_volume_level2str(vol->v_raid_level, 2284 vol->v_raid_level_qualifier)); 2285 sbuf_printf(sb, 2286 "%s<Transformation>%s</Transformation>\n", indent, 2287 vol->v_tr ? vol->v_tr->tro_class->name : "NONE"); 2288 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 2289 vol->v_disks_count); 2290 sbuf_printf(sb, "%s<Strip>%u</Strip>\n", indent, 2291 vol->v_strip_size); 2292 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 2293 g_raid_volume_state2str(vol->v_state)); 2294 sbuf_printf(sb, "%s<Dirty>%s</Dirty>\n", indent, 2295 vol->v_dirty ? "Yes" : "No"); 2296 sbuf_printf(sb, "%s<Subdisks>", indent); 2297 for (i = 0; i < vol->v_disks_count; i++) { 2298 sd = &vol->v_subdisks[i]; 2299 if (sd->sd_disk != NULL && 2300 sd->sd_disk->d_consumer != NULL) { 2301 sbuf_printf(sb, "%s ", 2302 g_raid_get_diskname(sd->sd_disk)); 2303 } else { 2304 sbuf_printf(sb, "NONE "); 2305 } 2306 sbuf_printf(sb, "(%s", 2307 g_raid_subdisk_state2str(sd->sd_state)); 2308 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD || 2309 sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 2310 sbuf_printf(sb, " %d%%", 2311 (int)(sd->sd_rebuild_pos * 100 / 2312 sd->sd_size)); 2313 } 2314 sbuf_printf(sb, ")"); 2315 if (i + 1 < vol->v_disks_count) 2316 sbuf_printf(sb, ", "); 2317 } 2318 sbuf_printf(sb, "</Subdisks>\n"); 2319 sx_xunlock(&sc->sc_lock); 2320 g_topology_lock(); 2321 } else if (cp != NULL) { 2322 disk = cp->private; 2323 if (disk == NULL) 2324 return; 2325 g_topology_unlock(); 2326 sx_xlock(&sc->sc_lock); 2327 sbuf_printf(sb, "%s<State>%s", indent, 2328 g_raid_disk_state2str(disk->d_state)); 2329 if (!TAILQ_EMPTY(&disk->d_subdisks)) { 2330 sbuf_printf(sb, " ("); 2331 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 2332 sbuf_printf(sb, "%s", 2333 g_raid_subdisk_state2str(sd->sd_state)); 2334 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD || 2335 sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 2336 sbuf_printf(sb, " %d%%", 2337 (int)(sd->sd_rebuild_pos * 100 / 2338 sd->sd_size)); 2339 } 2340 if (TAILQ_NEXT(sd, sd_next)) 2341 sbuf_printf(sb, ", "); 2342 } 2343 sbuf_printf(sb, ")"); 2344 } 2345 sbuf_printf(sb, "</State>\n"); 2346 sbuf_printf(sb, "%s<Subdisks>", indent); 2347 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 2348 sbuf_printf(sb, "r%d(%s):%d@%ju", 2349 sd->sd_volume->v_global_id, 2350 sd->sd_volume->v_name, 2351 sd->sd_pos, sd->sd_offset); 2352 if (TAILQ_NEXT(sd, sd_next)) 2353 sbuf_printf(sb, ", "); 2354 } 2355 sbuf_printf(sb, "</Subdisks>\n"); 2356 sbuf_printf(sb, "%s<ReadErrors>%d</ReadErrors>\n", indent, 2357 disk->d_read_errs); 2358 sx_xunlock(&sc->sc_lock); 2359 g_topology_lock(); 2360 } else { 2361 g_topology_unlock(); 2362 sx_xlock(&sc->sc_lock); 2363 if (sc->sc_md) { 2364 sbuf_printf(sb, "%s<Metadata>%s</Metadata>\n", indent, 2365 sc->sc_md->mdo_class->name); 2366 } 2367 if (!TAILQ_EMPTY(&sc->sc_volumes)) { 2368 s = 0xff; 2369 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2370 if (vol->v_state < s) 2371 s = vol->v_state; 2372 } 2373 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 2374 g_raid_volume_state2str(s)); 2375 } 2376 sx_xunlock(&sc->sc_lock); 2377 g_topology_lock(); 2378 } 2379 } 2380 2381 static void 2382 g_raid_shutdown_pre_sync(void *arg, int howto) 2383 { 2384 struct g_class *mp; 2385 struct g_geom *gp, *gp2; 2386 struct g_raid_softc *sc; 2387 int error; 2388 2389 mp = arg; 2390 DROP_GIANT(); 2391 g_topology_lock(); 2392 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 2393 if ((sc = gp->softc) == NULL) 2394 continue; 2395 g_topology_unlock(); 2396 sx_xlock(&sc->sc_lock); 2397 g_cancel_event(sc); 2398 error = g_raid_destroy(sc, G_RAID_DESTROY_DELAYED); 2399 if (error != 0) 2400 sx_xunlock(&sc->sc_lock); 2401 g_topology_lock(); 2402 } 2403 g_topology_unlock(); 2404 PICKUP_GIANT(); 2405 } 2406 2407 static void 2408 g_raid_init(struct g_class *mp) 2409 { 2410 2411 g_raid_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, 2412 g_raid_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); 2413 if (g_raid_pre_sync == NULL) 2414 G_RAID_DEBUG(0, "Warning! Cannot register shutdown event."); 2415 g_raid_started = 1; 2416 } 2417 2418 static void 2419 g_raid_fini(struct g_class *mp) 2420 { 2421 2422 if (g_raid_pre_sync != NULL) 2423 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid_pre_sync); 2424 g_raid_started = 0; 2425 } 2426 2427 int 2428 g_raid_md_modevent(module_t mod, int type, void *arg) 2429 { 2430 struct g_raid_md_class *class, *c, *nc; 2431 int error; 2432 2433 error = 0; 2434 class = arg; 2435 switch (type) { 2436 case MOD_LOAD: 2437 c = LIST_FIRST(&g_raid_md_classes); 2438 if (c == NULL || c->mdc_priority > class->mdc_priority) 2439 LIST_INSERT_HEAD(&g_raid_md_classes, class, mdc_list); 2440 else { 2441 while ((nc = LIST_NEXT(c, mdc_list)) != NULL && 2442 nc->mdc_priority < class->mdc_priority) 2443 c = nc; 2444 LIST_INSERT_AFTER(c, class, mdc_list); 2445 } 2446 if (g_raid_started) 2447 g_retaste(&g_raid_class); 2448 break; 2449 case MOD_UNLOAD: 2450 LIST_REMOVE(class, mdc_list); 2451 break; 2452 default: 2453 error = EOPNOTSUPP; 2454 break; 2455 } 2456 2457 return (error); 2458 } 2459 2460 int 2461 g_raid_tr_modevent(module_t mod, int type, void *arg) 2462 { 2463 struct g_raid_tr_class *class, *c, *nc; 2464 int error; 2465 2466 error = 0; 2467 class = arg; 2468 switch (type) { 2469 case MOD_LOAD: 2470 c = LIST_FIRST(&g_raid_tr_classes); 2471 if (c == NULL || c->trc_priority > class->trc_priority) 2472 LIST_INSERT_HEAD(&g_raid_tr_classes, class, trc_list); 2473 else { 2474 while ((nc = LIST_NEXT(c, trc_list)) != NULL && 2475 nc->trc_priority < class->trc_priority) 2476 c = nc; 2477 LIST_INSERT_AFTER(c, class, trc_list); 2478 } 2479 break; 2480 case MOD_UNLOAD: 2481 LIST_REMOVE(class, trc_list); 2482 break; 2483 default: 2484 error = EOPNOTSUPP; 2485 break; 2486 } 2487 2488 return (error); 2489 } 2490 2491 /* 2492 * Use local implementation of DECLARE_GEOM_CLASS(g_raid_class, g_raid) 2493 * to reduce module priority, allowing submodules to register them first. 2494 */ 2495 static moduledata_t g_raid_mod = { 2496 "g_raid", 2497 g_modevent, 2498 &g_raid_class 2499 }; 2500 DECLARE_MODULE(g_raid, g_raid_mod, SI_SUB_DRIVERS, SI_ORDER_THIRD); 2501 MODULE_VERSION(geom_raid, 0); 2502