1 /*- 2 * Copyright (c) 2004 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/conf.h> 33 #include <sys/kernel.h> 34 #include <sys/kthread.h> 35 #include <sys/libkern.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/module.h> 39 #include <sys/mutex.h> 40 #include <sys/systm.h> 41 42 #include <geom/geom.h> 43 #include <geom/vinum/geom_vinum_var.h> 44 #include <geom/vinum/geom_vinum.h> 45 46 static void gv_vol_completed_request(struct gv_volume *, struct bio *); 47 static void gv_vol_normal_request(struct gv_volume *, struct bio *); 48 49 static void 50 gv_volume_orphan(struct g_consumer *cp) 51 { 52 struct g_geom *gp; 53 struct gv_volume *v; 54 int error; 55 56 g_topology_assert(); 57 gp = cp->geom; 58 g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name); 59 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 60 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 61 error = cp->provider->error; 62 if (error == 0) 63 error = ENXIO; 64 g_detach(cp); 65 g_destroy_consumer(cp); 66 if (!LIST_EMPTY(&gp->consumer)) 67 return; 68 v = gp->softc; 69 if (v != NULL) { 70 gv_kill_vol_thread(v); 71 v->geom = NULL; 72 } 73 gp->softc = NULL; 74 g_wither_geom(gp, error); 75 } 76 77 /* We end up here after the requests to our plexes are done. */ 78 static void 79 gv_volume_done(struct bio *bp) 80 { 81 struct gv_volume *v; 82 83 v = bp->bio_from->geom->softc; 84 bp->bio_cflags |= GV_BIO_DONE; 85 mtx_lock(&v->bqueue_mtx); 86 bioq_insert_tail(v->bqueue, bp); 87 wakeup(v); 88 mtx_unlock(&v->bqueue_mtx); 89 } 90 91 static void 92 gv_volume_start(struct bio *bp) 93 { 94 struct gv_volume *v; 95 96 switch(bp->bio_cmd) { 97 case BIO_READ: 98 case BIO_WRITE: 99 case BIO_DELETE: 100 break; 101 case BIO_GETATTR: 102 default: 103 g_io_deliver(bp, EOPNOTSUPP); 104 return; 105 } 106 107 v = bp->bio_to->geom->softc; 108 if (v->state != GV_VOL_UP) { 109 g_io_deliver(bp, ENXIO); 110 return; 111 } 112 113 mtx_lock(&v->bqueue_mtx); 114 bioq_disksort(v->bqueue, bp); 115 wakeup(v); 116 mtx_unlock(&v->bqueue_mtx); 117 } 118 119 static void 120 gv_vol_worker(void *arg) 121 { 122 struct bio *bp; 123 struct gv_volume *v; 124 125 v = arg; 126 KASSERT(v != NULL, ("NULL v")); 127 mtx_lock(&v->bqueue_mtx); 128 for (;;) { 129 /* We were signaled to exit. */ 130 if (v->flags & GV_VOL_THREAD_DIE) 131 break; 132 133 /* Take the first BIO from our queue. */ 134 bp = bioq_takefirst(v->bqueue); 135 if (bp == NULL) { 136 msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10); 137 continue; 138 } 139 mtx_unlock(&v->bqueue_mtx); 140 141 if (bp->bio_cflags & GV_BIO_DONE) 142 gv_vol_completed_request(v, bp); 143 else 144 gv_vol_normal_request(v, bp); 145 146 mtx_lock(&v->bqueue_mtx); 147 } 148 mtx_unlock(&v->bqueue_mtx); 149 v->flags |= GV_VOL_THREAD_DEAD; 150 wakeup(v); 151 152 kproc_exit(ENXIO); 153 } 154 155 static void 156 gv_vol_completed_request(struct gv_volume *v, struct bio *bp) 157 { 158 struct bio *pbp; 159 struct g_geom *gp; 160 struct g_consumer *cp, *cp2; 161 162 pbp = bp->bio_parent; 163 164 if (pbp->bio_error == 0) 165 pbp->bio_error = bp->bio_error; 166 167 switch (pbp->bio_cmd) { 168 case BIO_READ: 169 if (bp->bio_error == 0) 170 break; 171 172 if (pbp->bio_cflags & GV_BIO_RETRY) 173 break; 174 175 /* Check if we have another plex left. */ 176 cp = bp->bio_from; 177 gp = cp->geom; 178 cp2 = LIST_NEXT(cp, consumer); 179 if (cp2 == NULL) 180 break; 181 182 if (LIST_NEXT(cp2, consumer) == NULL) 183 pbp->bio_cflags |= GV_BIO_RETRY; 184 185 g_destroy_bio(bp); 186 pbp->bio_children--; 187 mtx_lock(&v->bqueue_mtx); 188 bioq_disksort(v->bqueue, pbp); 189 mtx_unlock(&v->bqueue_mtx); 190 return; 191 192 case BIO_WRITE: 193 case BIO_DELETE: 194 /* Remember if this write request succeeded. */ 195 if (bp->bio_error == 0) 196 pbp->bio_cflags |= GV_BIO_SUCCEED; 197 break; 198 } 199 200 /* When the original request is finished, we deliver it. */ 201 pbp->bio_inbed++; 202 if (pbp->bio_inbed == pbp->bio_children) { 203 if (pbp->bio_cflags & GV_BIO_SUCCEED) 204 pbp->bio_error = 0; 205 pbp->bio_completed = bp->bio_length; 206 g_io_deliver(pbp, pbp->bio_error); 207 } 208 209 g_destroy_bio(bp); 210 } 211 212 static void 213 gv_vol_normal_request(struct gv_volume *v, struct bio *bp) 214 { 215 struct bio_queue_head queue; 216 struct g_geom *gp; 217 struct gv_plex *p, *lp; 218 struct bio *cbp; 219 220 gp = v->geom; 221 222 switch (bp->bio_cmd) { 223 case BIO_READ: 224 cbp = g_clone_bio(bp); 225 if (cbp == NULL) { 226 g_io_deliver(bp, ENOMEM); 227 return; 228 } 229 cbp->bio_done = gv_volume_done; 230 /* 231 * Try to find a good plex where we can send the request to. 232 * The plex either has to be up, or it's a degraded RAID5 plex. 233 */ 234 lp = v->last_read_plex; 235 if (lp == NULL) 236 lp = LIST_FIRST(&v->plexes); 237 p = LIST_NEXT(lp, in_volume); 238 do { 239 if (p == NULL) 240 p = LIST_FIRST(&v->plexes); 241 if ((p->state > GV_PLEX_DEGRADED) || 242 (p->state >= GV_PLEX_DEGRADED && 243 p->org == GV_PLEX_RAID5)) 244 break; 245 p = LIST_NEXT(p, in_volume); 246 } while (p != lp); 247 248 if (p == NULL || 249 (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) || 250 (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) { 251 g_destroy_bio(cbp); 252 bp->bio_children--; 253 g_io_deliver(bp, ENXIO); 254 return; 255 } 256 g_io_request(cbp, p->consumer); 257 v->last_read_plex = p; 258 259 break; 260 261 case BIO_WRITE: 262 case BIO_DELETE: 263 bioq_init(&queue); 264 LIST_FOREACH(p, &v->plexes, in_volume) { 265 if (p->state < GV_PLEX_DEGRADED) 266 continue; 267 cbp = g_clone_bio(bp); 268 if (cbp == NULL) { 269 for (cbp = bioq_first(&queue); cbp != NULL; 270 cbp = bioq_first(&queue)) { 271 bioq_remove(&queue, cbp); 272 g_destroy_bio(cbp); 273 } 274 if (bp->bio_error == 0) 275 bp->bio_error = ENOMEM; 276 g_io_deliver(bp, bp->bio_error); 277 return; 278 } 279 bioq_insert_tail(&queue, cbp); 280 cbp->bio_done = gv_volume_done; 281 cbp->bio_caller1 = p->consumer; 282 } 283 /* Fire off all sub-requests. */ 284 for (cbp = bioq_first(&queue); cbp != NULL; 285 cbp = bioq_first(&queue)) { 286 bioq_remove(&queue, cbp); 287 g_io_request(cbp, cbp->bio_caller1); 288 } 289 break; 290 } 291 } 292 293 static int 294 gv_volume_access(struct g_provider *pp, int dr, int dw, int de) 295 { 296 struct g_geom *gp; 297 struct g_consumer *cp, *cp2; 298 int error; 299 300 gp = pp->geom; 301 302 error = ENXIO; 303 LIST_FOREACH(cp, &gp->consumer, consumer) { 304 error = g_access(cp, dr, dw, de); 305 if (error) { 306 LIST_FOREACH(cp2, &gp->consumer, consumer) { 307 if (cp == cp2) 308 break; 309 g_access(cp2, -dr, -dw, -de); 310 } 311 return (error); 312 } 313 } 314 return (error); 315 } 316 317 static struct g_geom * 318 gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 319 { 320 struct g_geom *gp; 321 struct g_provider *pp2; 322 struct g_consumer *cp, *ocp; 323 struct gv_softc *sc; 324 struct gv_volume *v; 325 struct gv_plex *p; 326 int error, first; 327 328 g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name); 329 g_topology_assert(); 330 331 /* First, find the VINUM class and its associated geom. */ 332 gp = find_vinum_geom(); 333 if (gp == NULL) 334 return (NULL); 335 336 sc = gp->softc; 337 KASSERT(sc != NULL, ("gv_volume_taste: NULL sc")); 338 339 gp = pp->geom; 340 341 /* We only want to attach to plexes. */ 342 if (strcmp(gp->class->name, "VINUMPLEX")) 343 return (NULL); 344 345 first = 0; 346 p = gp->softc; 347 348 /* Let's see if the volume this plex wants is already configured. */ 349 v = gv_find_vol(sc, p->volume); 350 if (v == NULL) 351 return (NULL); 352 if (v->geom == NULL) { 353 gp = g_new_geomf(mp, "%s", p->volume); 354 gp->start = gv_volume_start; 355 gp->orphan = gv_volume_orphan; 356 gp->access = gv_volume_access; 357 gp->softc = v; 358 first++; 359 } else 360 gp = v->geom; 361 362 /* Create bio queue, queue mutex, and worker thread, if necessary. */ 363 if (v->bqueue == NULL) { 364 v->bqueue = g_malloc(sizeof(struct bio_queue_head), 365 M_WAITOK | M_ZERO); 366 bioq_init(v->bqueue); 367 } 368 if (mtx_initialized(&v->bqueue_mtx) == 0) 369 mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF); 370 371 if (!(v->flags & GV_VOL_THREAD_ACTIVE)) { 372 kproc_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s", 373 v->name); 374 v->flags |= GV_VOL_THREAD_ACTIVE; 375 } 376 377 /* 378 * Create a new consumer and attach it to the plex geom. Since this 379 * volume might already have a plex attached, we need to adjust the 380 * access counts of the new consumer. 381 */ 382 ocp = LIST_FIRST(&gp->consumer); 383 cp = g_new_consumer(gp); 384 g_attach(cp, pp); 385 if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) { 386 error = g_access(cp, ocp->acr, ocp->acw, ocp->ace); 387 if (error) { 388 printf("GEOM_VINUM: failed g_access %s -> %s; " 389 "errno %d\n", v->name, p->name, error); 390 g_detach(cp); 391 g_destroy_consumer(cp); 392 if (first) 393 g_destroy_geom(gp); 394 return (NULL); 395 } 396 } 397 398 p->consumer = cp; 399 400 if (p->vol_sc != v) { 401 p->vol_sc = v; 402 v->plexcount++; 403 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 404 } 405 406 /* We need to setup a new VINUMVOLUME geom. */ 407 if (first) { 408 pp2 = g_new_providerf(gp, "gvinum/%s", v->name); 409 pp2->mediasize = pp->mediasize; 410 pp2->sectorsize = pp->sectorsize; 411 g_error_provider(pp2, 0); 412 v->size = pp2->mediasize; 413 v->geom = gp; 414 return (gp); 415 } 416 417 return (NULL); 418 } 419 420 static int 421 gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp, 422 struct g_geom *gp) 423 { 424 struct gv_volume *v; 425 426 g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name); 427 g_topology_assert(); 428 429 v = gp->softc; 430 gv_kill_vol_thread(v); 431 g_wither_geom(gp, ENXIO); 432 return (0); 433 } 434 435 #define VINUMVOLUME_CLASS_NAME "VINUMVOLUME" 436 437 static struct g_class g_vinum_volume_class = { 438 .name = VINUMVOLUME_CLASS_NAME, 439 .version = G_VERSION, 440 .taste = gv_volume_taste, 441 .destroy_geom = gv_volume_destroy_geom, 442 }; 443 444 DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume); 445