1*89b17223SAlexander Motin /*- 2*89b17223SAlexander Motin * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3*89b17223SAlexander Motin * All rights reserved. 4*89b17223SAlexander Motin * 5*89b17223SAlexander Motin * Redistribution and use in source and binary forms, with or without 6*89b17223SAlexander Motin * modification, are permitted provided that the following conditions 7*89b17223SAlexander Motin * are met: 8*89b17223SAlexander Motin * 1. Redistributions of source code must retain the above copyright 9*89b17223SAlexander Motin * notice, this list of conditions and the following disclaimer. 10*89b17223SAlexander Motin * 2. Redistributions in binary form must reproduce the above copyright 11*89b17223SAlexander Motin * notice, this list of conditions and the following disclaimer in the 12*89b17223SAlexander Motin * documentation and/or other materials provided with the distribution. 13*89b17223SAlexander Motin * 14*89b17223SAlexander Motin * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15*89b17223SAlexander Motin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16*89b17223SAlexander Motin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17*89b17223SAlexander Motin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18*89b17223SAlexander Motin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19*89b17223SAlexander Motin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20*89b17223SAlexander Motin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21*89b17223SAlexander Motin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22*89b17223SAlexander Motin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23*89b17223SAlexander Motin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24*89b17223SAlexander Motin * SUCH DAMAGE. 25*89b17223SAlexander Motin */ 26*89b17223SAlexander Motin 27*89b17223SAlexander Motin #include <sys/cdefs.h> 28*89b17223SAlexander Motin __FBSDID("$FreeBSD$"); 29*89b17223SAlexander Motin 30*89b17223SAlexander Motin #include <sys/param.h> 31*89b17223SAlexander Motin #include <sys/bio.h> 32*89b17223SAlexander Motin #include <sys/endian.h> 33*89b17223SAlexander Motin #include <sys/kernel.h> 34*89b17223SAlexander Motin #include <sys/kobj.h> 35*89b17223SAlexander Motin #include <sys/lock.h> 36*89b17223SAlexander Motin #include <sys/malloc.h> 37*89b17223SAlexander Motin #include <sys/mutex.h> 38*89b17223SAlexander Motin #include <sys/systm.h> 39*89b17223SAlexander Motin #include <geom/geom.h> 40*89b17223SAlexander Motin #include "geom/raid/g_raid.h" 41*89b17223SAlexander Motin #include "g_raid_tr_if.h" 42*89b17223SAlexander Motin 43*89b17223SAlexander Motin static MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data"); 44*89b17223SAlexander Motin 45*89b17223SAlexander Motin struct g_raid_tr_concat_object { 46*89b17223SAlexander Motin struct g_raid_tr_object trso_base; 47*89b17223SAlexander Motin int trso_starting; 48*89b17223SAlexander Motin int trso_stopped; 49*89b17223SAlexander Motin }; 50*89b17223SAlexander Motin 51*89b17223SAlexander Motin static g_raid_tr_taste_t g_raid_tr_taste_concat; 52*89b17223SAlexander Motin static g_raid_tr_event_t g_raid_tr_event_concat; 53*89b17223SAlexander Motin static g_raid_tr_start_t g_raid_tr_start_concat; 54*89b17223SAlexander Motin static g_raid_tr_stop_t g_raid_tr_stop_concat; 55*89b17223SAlexander Motin static g_raid_tr_iostart_t g_raid_tr_iostart_concat; 56*89b17223SAlexander Motin static g_raid_tr_iodone_t g_raid_tr_iodone_concat; 57*89b17223SAlexander Motin static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat; 58*89b17223SAlexander Motin static g_raid_tr_free_t g_raid_tr_free_concat; 59*89b17223SAlexander Motin 60*89b17223SAlexander Motin static kobj_method_t g_raid_tr_concat_methods[] = { 61*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_concat), 62*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_concat), 63*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_concat), 64*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_concat), 65*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_concat), 66*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_concat), 67*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_concat), 68*89b17223SAlexander Motin KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_concat), 69*89b17223SAlexander Motin { 0, 0 } 70*89b17223SAlexander Motin }; 71*89b17223SAlexander Motin 72*89b17223SAlexander Motin static struct g_raid_tr_class g_raid_tr_concat_class = { 73*89b17223SAlexander Motin "CONCAT", 74*89b17223SAlexander Motin g_raid_tr_concat_methods, 75*89b17223SAlexander Motin sizeof(struct g_raid_tr_concat_object), 76*89b17223SAlexander Motin .trc_priority = 50 77*89b17223SAlexander Motin }; 78*89b17223SAlexander Motin 79*89b17223SAlexander Motin static int 80*89b17223SAlexander Motin g_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume) 81*89b17223SAlexander Motin { 82*89b17223SAlexander Motin struct g_raid_tr_concat_object *trs; 83*89b17223SAlexander Motin 84*89b17223SAlexander Motin trs = (struct g_raid_tr_concat_object *)tr; 85*89b17223SAlexander Motin if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE && 86*89b17223SAlexander Motin tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 87*89b17223SAlexander Motin !(tr->tro_volume->v_disks_count == 1 && 88*89b17223SAlexander Motin tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN)) 89*89b17223SAlexander Motin return (G_RAID_TR_TASTE_FAIL); 90*89b17223SAlexander Motin trs->trso_starting = 1; 91*89b17223SAlexander Motin return (G_RAID_TR_TASTE_SUCCEED); 92*89b17223SAlexander Motin } 93*89b17223SAlexander Motin 94*89b17223SAlexander Motin static int 95*89b17223SAlexander Motin g_raid_tr_update_state_concat(struct g_raid_volume *vol) 96*89b17223SAlexander Motin { 97*89b17223SAlexander Motin struct g_raid_tr_concat_object *trs; 98*89b17223SAlexander Motin struct g_raid_softc *sc; 99*89b17223SAlexander Motin off_t size; 100*89b17223SAlexander Motin u_int s; 101*89b17223SAlexander Motin int i, n, f; 102*89b17223SAlexander Motin 103*89b17223SAlexander Motin sc = vol->v_softc; 104*89b17223SAlexander Motin trs = (struct g_raid_tr_concat_object *)vol->v_tr; 105*89b17223SAlexander Motin if (trs->trso_stopped) 106*89b17223SAlexander Motin s = G_RAID_VOLUME_S_STOPPED; 107*89b17223SAlexander Motin else if (trs->trso_starting) 108*89b17223SAlexander Motin s = G_RAID_VOLUME_S_STARTING; 109*89b17223SAlexander Motin else { 110*89b17223SAlexander Motin n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE); 111*89b17223SAlexander Motin f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED); 112*89b17223SAlexander Motin if (n + f == vol->v_disks_count) { 113*89b17223SAlexander Motin if (f == 0) 114*89b17223SAlexander Motin s = G_RAID_VOLUME_S_OPTIMAL; 115*89b17223SAlexander Motin else 116*89b17223SAlexander Motin s = G_RAID_VOLUME_S_SUBOPTIMAL; 117*89b17223SAlexander Motin } else 118*89b17223SAlexander Motin s = G_RAID_VOLUME_S_BROKEN; 119*89b17223SAlexander Motin } 120*89b17223SAlexander Motin if (s != vol->v_state) { 121*89b17223SAlexander Motin 122*89b17223SAlexander Motin /* 123*89b17223SAlexander Motin * Some metadata modules may not know CONCAT volume 124*89b17223SAlexander Motin * mediasize until all disks connected. Recalculate. 125*89b17223SAlexander Motin */ 126*89b17223SAlexander Motin if (G_RAID_VOLUME_S_ALIVE(s) && 127*89b17223SAlexander Motin !G_RAID_VOLUME_S_ALIVE(vol->v_state)) { 128*89b17223SAlexander Motin size = 0; 129*89b17223SAlexander Motin for (i = 0; i < vol->v_disks_count; i++) { 130*89b17223SAlexander Motin if (vol->v_subdisks[i].sd_state != 131*89b17223SAlexander Motin G_RAID_SUBDISK_S_NONE) 132*89b17223SAlexander Motin size += vol->v_subdisks[i].sd_size; 133*89b17223SAlexander Motin } 134*89b17223SAlexander Motin vol->v_mediasize = size; 135*89b17223SAlexander Motin } 136*89b17223SAlexander Motin 137*89b17223SAlexander Motin g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ? 138*89b17223SAlexander Motin G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN, 139*89b17223SAlexander Motin G_RAID_EVENT_VOLUME); 140*89b17223SAlexander Motin g_raid_change_volume_state(vol, s); 141*89b17223SAlexander Motin if (!trs->trso_starting && !trs->trso_stopped) 142*89b17223SAlexander Motin g_raid_write_metadata(sc, vol, NULL, NULL); 143*89b17223SAlexander Motin } 144*89b17223SAlexander Motin return (0); 145*89b17223SAlexander Motin } 146*89b17223SAlexander Motin 147*89b17223SAlexander Motin static int 148*89b17223SAlexander Motin g_raid_tr_event_concat(struct g_raid_tr_object *tr, 149*89b17223SAlexander Motin struct g_raid_subdisk *sd, u_int event) 150*89b17223SAlexander Motin { 151*89b17223SAlexander Motin struct g_raid_tr_concat_object *trs; 152*89b17223SAlexander Motin struct g_raid_softc *sc; 153*89b17223SAlexander Motin struct g_raid_volume *vol; 154*89b17223SAlexander Motin int state; 155*89b17223SAlexander Motin 156*89b17223SAlexander Motin trs = (struct g_raid_tr_concat_object *)tr; 157*89b17223SAlexander Motin vol = tr->tro_volume; 158*89b17223SAlexander Motin sc = vol->v_softc; 159*89b17223SAlexander Motin 160*89b17223SAlexander Motin state = sd->sd_state; 161*89b17223SAlexander Motin if (state != G_RAID_SUBDISK_S_NONE && 162*89b17223SAlexander Motin state != G_RAID_SUBDISK_S_FAILED && 163*89b17223SAlexander Motin state != G_RAID_SUBDISK_S_ACTIVE) { 164*89b17223SAlexander Motin G_RAID_DEBUG1(1, sc, 165*89b17223SAlexander Motin "Promote subdisk %s:%d from %s to ACTIVE.", 166*89b17223SAlexander Motin vol->v_name, sd->sd_pos, 167*89b17223SAlexander Motin g_raid_subdisk_state2str(sd->sd_state)); 168*89b17223SAlexander Motin g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE); 169*89b17223SAlexander Motin } 170*89b17223SAlexander Motin if (state != sd->sd_state && 171*89b17223SAlexander Motin !trs->trso_starting && !trs->trso_stopped) 172*89b17223SAlexander Motin g_raid_write_metadata(sc, vol, sd, NULL); 173*89b17223SAlexander Motin g_raid_tr_update_state_concat(vol); 174*89b17223SAlexander Motin return (0); 175*89b17223SAlexander Motin } 176*89b17223SAlexander Motin 177*89b17223SAlexander Motin static int 178*89b17223SAlexander Motin g_raid_tr_start_concat(struct g_raid_tr_object *tr) 179*89b17223SAlexander Motin { 180*89b17223SAlexander Motin struct g_raid_tr_concat_object *trs; 181*89b17223SAlexander Motin struct g_raid_volume *vol; 182*89b17223SAlexander Motin 183*89b17223SAlexander Motin trs = (struct g_raid_tr_concat_object *)tr; 184*89b17223SAlexander Motin vol = tr->tro_volume; 185*89b17223SAlexander Motin trs->trso_starting = 0; 186*89b17223SAlexander Motin g_raid_tr_update_state_concat(vol); 187*89b17223SAlexander Motin return (0); 188*89b17223SAlexander Motin } 189*89b17223SAlexander Motin 190*89b17223SAlexander Motin static int 191*89b17223SAlexander Motin g_raid_tr_stop_concat(struct g_raid_tr_object *tr) 192*89b17223SAlexander Motin { 193*89b17223SAlexander Motin struct g_raid_tr_concat_object *trs; 194*89b17223SAlexander Motin struct g_raid_volume *vol; 195*89b17223SAlexander Motin 196*89b17223SAlexander Motin trs = (struct g_raid_tr_concat_object *)tr; 197*89b17223SAlexander Motin vol = tr->tro_volume; 198*89b17223SAlexander Motin trs->trso_starting = 0; 199*89b17223SAlexander Motin trs->trso_stopped = 1; 200*89b17223SAlexander Motin g_raid_tr_update_state_concat(vol); 201*89b17223SAlexander Motin return (0); 202*89b17223SAlexander Motin } 203*89b17223SAlexander Motin 204*89b17223SAlexander Motin static void 205*89b17223SAlexander Motin g_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) 206*89b17223SAlexander Motin { 207*89b17223SAlexander Motin struct g_raid_volume *vol; 208*89b17223SAlexander Motin struct g_raid_subdisk *sd; 209*89b17223SAlexander Motin struct bio_queue_head queue; 210*89b17223SAlexander Motin struct bio *cbp; 211*89b17223SAlexander Motin char *addr; 212*89b17223SAlexander Motin off_t offset, length, remain; 213*89b17223SAlexander Motin u_int no; 214*89b17223SAlexander Motin 215*89b17223SAlexander Motin vol = tr->tro_volume; 216*89b17223SAlexander Motin if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL && 217*89b17223SAlexander Motin vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) { 218*89b17223SAlexander Motin g_raid_iodone(bp, EIO); 219*89b17223SAlexander Motin return; 220*89b17223SAlexander Motin } 221*89b17223SAlexander Motin if (bp->bio_cmd == BIO_FLUSH) { 222*89b17223SAlexander Motin g_raid_tr_flush_common(tr, bp); 223*89b17223SAlexander Motin return; 224*89b17223SAlexander Motin } 225*89b17223SAlexander Motin 226*89b17223SAlexander Motin offset = bp->bio_offset; 227*89b17223SAlexander Motin remain = bp->bio_length; 228*89b17223SAlexander Motin addr = bp->bio_data; 229*89b17223SAlexander Motin no = 0; 230*89b17223SAlexander Motin while (no < vol->v_disks_count && 231*89b17223SAlexander Motin offset >= vol->v_subdisks[no].sd_size) { 232*89b17223SAlexander Motin offset -= vol->v_subdisks[no].sd_size; 233*89b17223SAlexander Motin no++; 234*89b17223SAlexander Motin } 235*89b17223SAlexander Motin KASSERT(no < vol->v_disks_count, 236*89b17223SAlexander Motin ("Request starts after volume end (%ju)", bp->bio_offset)); 237*89b17223SAlexander Motin bioq_init(&queue); 238*89b17223SAlexander Motin do { 239*89b17223SAlexander Motin sd = &vol->v_subdisks[no]; 240*89b17223SAlexander Motin length = MIN(sd->sd_size - offset, remain); 241*89b17223SAlexander Motin cbp = g_clone_bio(bp); 242*89b17223SAlexander Motin if (cbp == NULL) 243*89b17223SAlexander Motin goto failure; 244*89b17223SAlexander Motin cbp->bio_offset = offset; 245*89b17223SAlexander Motin cbp->bio_data = addr; 246*89b17223SAlexander Motin cbp->bio_length = length; 247*89b17223SAlexander Motin cbp->bio_caller1 = sd; 248*89b17223SAlexander Motin bioq_insert_tail(&queue, cbp); 249*89b17223SAlexander Motin remain -= length; 250*89b17223SAlexander Motin addr += length; 251*89b17223SAlexander Motin offset = 0; 252*89b17223SAlexander Motin no++; 253*89b17223SAlexander Motin KASSERT(no < vol->v_disks_count || remain == 0, 254*89b17223SAlexander Motin ("Request ends after volume end (%ju, %ju)", 255*89b17223SAlexander Motin bp->bio_offset, bp->bio_length)); 256*89b17223SAlexander Motin } while (remain > 0); 257*89b17223SAlexander Motin for (cbp = bioq_first(&queue); cbp != NULL; 258*89b17223SAlexander Motin cbp = bioq_first(&queue)) { 259*89b17223SAlexander Motin bioq_remove(&queue, cbp); 260*89b17223SAlexander Motin sd = cbp->bio_caller1; 261*89b17223SAlexander Motin cbp->bio_caller1 = NULL; 262*89b17223SAlexander Motin g_raid_subdisk_iostart(sd, cbp); 263*89b17223SAlexander Motin } 264*89b17223SAlexander Motin return; 265*89b17223SAlexander Motin failure: 266*89b17223SAlexander Motin for (cbp = bioq_first(&queue); cbp != NULL; 267*89b17223SAlexander Motin cbp = bioq_first(&queue)) { 268*89b17223SAlexander Motin bioq_remove(&queue, cbp); 269*89b17223SAlexander Motin g_destroy_bio(cbp); 270*89b17223SAlexander Motin } 271*89b17223SAlexander Motin if (bp->bio_error == 0) 272*89b17223SAlexander Motin bp->bio_error = ENOMEM; 273*89b17223SAlexander Motin g_raid_iodone(bp, bp->bio_error); 274*89b17223SAlexander Motin } 275*89b17223SAlexander Motin 276*89b17223SAlexander Motin static int 277*89b17223SAlexander Motin g_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr, 278*89b17223SAlexander Motin void *virtual, vm_offset_t physical, off_t boffset, size_t blength) 279*89b17223SAlexander Motin { 280*89b17223SAlexander Motin struct g_raid_volume *vol; 281*89b17223SAlexander Motin struct g_raid_subdisk *sd; 282*89b17223SAlexander Motin char *addr; 283*89b17223SAlexander Motin off_t offset, length, remain; 284*89b17223SAlexander Motin int error, no; 285*89b17223SAlexander Motin 286*89b17223SAlexander Motin vol = tr->tro_volume; 287*89b17223SAlexander Motin if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 288*89b17223SAlexander Motin return (ENXIO); 289*89b17223SAlexander Motin 290*89b17223SAlexander Motin offset = boffset; 291*89b17223SAlexander Motin remain = blength; 292*89b17223SAlexander Motin addr = virtual; 293*89b17223SAlexander Motin no = 0; 294*89b17223SAlexander Motin while (no < vol->v_disks_count && 295*89b17223SAlexander Motin offset >= vol->v_subdisks[no].sd_size) { 296*89b17223SAlexander Motin offset -= vol->v_subdisks[no].sd_size; 297*89b17223SAlexander Motin no++; 298*89b17223SAlexander Motin } 299*89b17223SAlexander Motin KASSERT(no < vol->v_disks_count, 300*89b17223SAlexander Motin ("Request starts after volume end (%ju)", boffset)); 301*89b17223SAlexander Motin do { 302*89b17223SAlexander Motin sd = &vol->v_subdisks[no]; 303*89b17223SAlexander Motin length = MIN(sd->sd_size - offset, remain); 304*89b17223SAlexander Motin error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], 305*89b17223SAlexander Motin addr, 0, offset, length); 306*89b17223SAlexander Motin if (error != 0) 307*89b17223SAlexander Motin return (error); 308*89b17223SAlexander Motin remain -= length; 309*89b17223SAlexander Motin addr += length; 310*89b17223SAlexander Motin offset = 0; 311*89b17223SAlexander Motin no++; 312*89b17223SAlexander Motin KASSERT(no < vol->v_disks_count || remain == 0, 313*89b17223SAlexander Motin ("Request ends after volume end (%ju, %zu)", 314*89b17223SAlexander Motin boffset, blength)); 315*89b17223SAlexander Motin } while (remain > 0); 316*89b17223SAlexander Motin return (0); 317*89b17223SAlexander Motin } 318*89b17223SAlexander Motin 319*89b17223SAlexander Motin static void 320*89b17223SAlexander Motin g_raid_tr_iodone_concat(struct g_raid_tr_object *tr, 321*89b17223SAlexander Motin struct g_raid_subdisk *sd,struct bio *bp) 322*89b17223SAlexander Motin { 323*89b17223SAlexander Motin struct bio *pbp; 324*89b17223SAlexander Motin 325*89b17223SAlexander Motin pbp = bp->bio_parent; 326*89b17223SAlexander Motin if (pbp->bio_error == 0) 327*89b17223SAlexander Motin pbp->bio_error = bp->bio_error; 328*89b17223SAlexander Motin g_destroy_bio(bp); 329*89b17223SAlexander Motin pbp->bio_inbed++; 330*89b17223SAlexander Motin if (pbp->bio_children == pbp->bio_inbed) { 331*89b17223SAlexander Motin pbp->bio_completed = pbp->bio_length; 332*89b17223SAlexander Motin g_raid_iodone(pbp, bp->bio_error); 333*89b17223SAlexander Motin } 334*89b17223SAlexander Motin } 335*89b17223SAlexander Motin 336*89b17223SAlexander Motin static int 337*89b17223SAlexander Motin g_raid_tr_free_concat(struct g_raid_tr_object *tr) 338*89b17223SAlexander Motin { 339*89b17223SAlexander Motin 340*89b17223SAlexander Motin return (0); 341*89b17223SAlexander Motin } 342*89b17223SAlexander Motin 343*89b17223SAlexander Motin G_RAID_TR_DECLARE(g_raid_tr_concat); 344