/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include "mhd_local.h" /* * manipulate set list */ /* * global set list */ static mutex_t mhd_set_mx = DEFAULTMUTEX; static uint_t mhd_nset = 0; static mhd_drive_set_t **mhd_sets = NULL; /* * add drive to set */ void mhd_add_drive_to_set( mhd_drive_set_t *sp, mhd_drive_t *dp ) { mhd_drive_list_t *dlp = &sp->sr_drives; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); assert(DRIVE_IS_IDLE(dp)); /* add to set */ mhd_add_drive(dlp, dp); /* adjust backlink */ dp->dr_sp = sp; } /* * delete drive from set */ void mhd_del_drive_from_set( mhd_drive_t *dp ) { mhd_drive_set_t *sp = dp->dr_sp; mhd_drive_list_t *dlp = &sp->sr_drives; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); assert(DRIVE_IS_IDLE(dp)); /* delete from set */ mhd_del_drive(dlp, dp); /* adjust backlink */ dp->dr_sp = NULL; } /* * find set in list */ static mhd_drive_set_t * mhd_find_set( char *setname ) { uint_t i; /* check lock */ assert(MUTEX_HELD(&mhd_set_mx)); /* look for set */ for (i = 0; (i < mhd_nset); ++i) { mhd_drive_set_t *sp = mhd_sets[i]; if (strcmp(setname, sp->sr_name) == 0) return (sp); } /* not found */ return (NULL); } /* * wait for operation to complete */ static void mhd_wait_set( mhd_drive_set_t *sp, mhd_drive_list_t *dlp, mhd_state_t state ) { /* check lock */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); /* wait for complete */ for (;;) { uint_t cnt = 0; uint_t i; /* kick threads */ for (i = 0; (i < dlp->dl_ndrive); ++i) { mhd_drive_t *dp = dlp->dl_drives[i]; /* IDLE or ERRORED */ if (state == DRIVE_IDLE) { if (DRIVE_IS_IDLE(dp)) continue; } /* operation complete */ else { if (! (dp->dr_state & state)) continue; } /* kick thread */ mhd_cv_broadcast(&dp->dr_cv); ++cnt; } /* if complete, quit */ if (cnt == 0) break; /* wait for something to happen */ (void) mhd_cv_wait(&sp->sr_cv, &sp->sr_mx); } } /* * idle set */ static int mhd_idle_set( mhd_drive_set_t *sp, mhd_drive_list_t *dlp, mhd_error_t *mhep ) { uint_t i; /* check lock */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); /* disarm any failfast */ if (dlp->dl_ndrive >= sp->sr_drives.dl_ndrive) { if (mhd_ff_disarm(sp, mhep) != 0) return (-1); } /* set IDLING */ for (i = 0; (i < dlp->dl_ndrive); ++i) { mhd_drive_t *dp = dlp->dl_drives[i]; if (! DRIVE_IS_IDLE(dp)) { if (mhd_state(dp, DRIVE_IDLING, mhep) != 0) return (-1); } } /* wait for IDLE */ mhd_wait_set(sp, dlp, DRIVE_IDLE); /* return success */ return (0); } /* * create or update new set */ mhd_drive_set_t * mhd_create_set( mhd_set_t *mhsp, mhd_opts_t options, mhd_drive_list_t *dlp, mhd_error_t *mhep ) { char *setname; mhd_drive_set_t *sp; mhd_drive_list_t *sp_dlp; mhd_drive_set_t *null_sp; uint_t i; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); /* get setname */ if (mhsp == NULL) setname = ""; else setname = mhsp->setname; /* find or create set */ if ((sp = mhd_find_set(setname)) == NULL) { /* allocate and initialize set */ sp = Zalloc(sizeof (*sp)); sp->sr_name = Strdup(setname); mhd_mx_init(&sp->sr_mx); mhd_cv_init(&sp->sr_cv); sp->sr_ff = -1; /* append to set list */ ++mhd_nset; mhd_sets = Realloc(mhd_sets, (mhd_nset * sizeof (*mhd_sets))); mhd_sets[mhd_nset - 1] = sp; } sp_dlp = &sp->sr_drives; /* if just grabbing null set, return */ if (mhsp == NULL) return (sp); assert(strcmp(setname, "") != 0); assert(mhep != NULL); /* get null set */ null_sp = mhd_create_set(NULL, 0, NULL, NULL); assert(null_sp != NULL); assert(sp != null_sp); /* grab set lock */ mhd_mx_lock(&sp->sr_mx); /* save options */ if (options & MHD_SERIAL) sp->sr_options |= MHD_SERIAL; else sp->sr_options &= ~MHD_SERIAL; /* move drives no longer in set to null set */ if (! (options & MHD_PARTIAL_SET)) { for (i = 0; (i < sp_dlp->dl_ndrive); /* void */) { mhd_drive_t *dp = sp_dlp->dl_drives[i]; uint_t j; /* check still there */ for (j = 0; (j < mhsp->drives.drives_len); ++j) { mhd_drivename_t mhdp; mhdp = mhsp->drives.drives_val[j]; if (strcmp(dp->dr_rname, mhdp) == 0) break; } if (j < mhsp->drives.drives_len) { ++i; continue; } /* idle the drive */ if (mhd_idle(dp, mhep) != 0) mhd_clrerror(mhep); /* move to null set */ mhd_del_drive_from_set(dp); mhd_mx_unlock(&sp->sr_mx); mhd_mx_lock(&null_sp->sr_mx); mhd_add_drive_to_set(null_sp, dp); mhd_mx_unlock(&null_sp->sr_mx); mhd_mx_lock(&sp->sr_mx); } } /* add new drives to lists */ for (i = 0; (i < mhsp->drives.drives_len); ++i) { mhd_drivename_t mhdp = mhsp->drives.drives_val[i]; uint_t j; mhd_drive_t *dp; /* check already there */ for (j = 0; (j < dlp->dl_ndrive); ++j) { dp = dlp->dl_drives[j]; if (strcmp(mhdp, dp->dr_rname) == 0) break; } if (j < dlp->dl_ndrive) { mhd_add_drive(dlp, dp); continue; } /* add drive to set */ if ((dp = mhd_create_drive(sp, mhdp, NULL, mhep)) == NULL) { mhde_perror(mhep, "mhd_create_drive: %s", mhdp); continue; } mhd_add_drive(dlp, dp); } /* debug */ #ifdef MHD_DEBUG if (mhd_debug > 0) { for (i = 0; (i < mhd_nset); ++i) { mhd_drive_set_t *sp = mhd_sets[i]; mhd_drive_list_t *dlp = &sp->sr_drives; char buf[10240]; uint_t j; (void) snprintf(buf, sizeof (buf), "set '%s':", sp->sr_name); for (j = 0; (j < dlp->dl_ndrive); ++j) { mhd_drive_t *dp = dlp->dl_drives[j]; char *p; if ((p = strrchr(dp->dr_rname, '/')) != NULL) ++p; else p = dp->dr_rname; (void) strncat(buf, " ", sizeof (buf)); (void) strncat(buf, p, sizeof (buf)); } buf[sizeof (buf) - 1] = '\0'; mhd_eprintf("%s\n", buf); } } #endif /* MHD_DEBUG */ /* unlock, return set */ mhd_mx_unlock(&sp->sr_mx); return (sp); } /* * find drive */ mhd_drive_t * mhd_find_drive( char *rname ) { uint_t i; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); /* for each set */ for (i = 0; (i < mhd_nset); ++i) { mhd_drive_set_t *sp = mhd_sets[i]; mhd_drive_list_t *dlp = &sp->sr_drives; uint_t j; /* for each drive */ for (j = 0; (j < dlp->dl_ndrive); ++j) { mhd_drive_t *dp = dlp->dl_drives[j]; if (strcmp(rname, dp->dr_rname) == 0) return (dp); } } /* not found */ return (NULL); } /* * list all the drives */ int mhd_list_drives( char *path, mhd_did_flags_t flags, mhd_list_res_t *resultsp, mhd_error_t *mhep ) { mhd_state_t state; uint_t ndrive, i, j, c; /* grab lock */ mhd_mx_lock(&mhd_set_mx); /* add path to list */ if (mhd_create_drives(path, mhep) != 0) { mhd_mx_unlock(&mhd_set_mx); return (-1); } /* get what we want */ state = 0; if (flags & MHD_DID_SERIAL) state |= DRIVE_SERIALING; if (flags & MHD_DID_TIME) state |= DRIVE_VTOCING; if (flags & MHD_DID_CINFO) state |= DRIVE_CINFOING; /* ident and count drives */ for (ndrive = 0, i = 0; (i < mhd_nset); ++i) { mhd_drive_set_t *sp = mhd_sets[i]; mhd_drive_list_t *dlp = &sp->sr_drives; /* count drives */ ndrive += dlp->dl_ndrive; /* ident drives */ if (state != 0) { mhd_mx_lock(&sp->sr_mx); for (j = 0; (j < dlp->dl_ndrive); ++j) { mhd_drive_t *dp = dlp->dl_drives[j]; if (mhd_state_set(dp, state, mhep) != 0) { mhd_mx_unlock(&sp->sr_mx); mhd_mx_unlock(&mhd_set_mx); return (-1); } } mhd_wait_set(sp, dlp, state); mhd_mx_unlock(&sp->sr_mx); } } /* build list */ assert(resultsp->results.mhd_drive_info_list_t_len == 0); assert(resultsp->results.mhd_drive_info_list_t_val == NULL); resultsp->results.mhd_drive_info_list_t_len = ndrive; resultsp->results.mhd_drive_info_list_t_val = Zalloc( ndrive * sizeof (*resultsp->results.mhd_drive_info_list_t_val)); for (c = 0, i = 0; (i < mhd_nset); ++i) { mhd_drive_set_t *sp = mhd_sets[i]; mhd_drive_list_t *dlp = &sp->sr_drives; mhd_mx_lock(&sp->sr_mx); for (j = 0; (j < dlp->dl_ndrive); ++j) { mhd_drive_t *dp = dlp->dl_drives[j]; mhd_drive_info_t *ip = &resultsp->results.mhd_drive_info_list_t_val[c++]; ip->dif_name = Strdup(dp->dr_rname); ip->dif_id = dp->dr_drive_id; } mhd_mx_unlock(&sp->sr_mx); } assert(c == ndrive); /* unlock, return count */ mhd_mx_unlock(&mhd_set_mx); return (ndrive); } /* * release drives */ static int mhd_release_set( mhd_drive_set_t *sp, mhd_drive_list_t *dlp, mhd_error_t *mhep ) { uint_t i; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); /* idle set */ if (mhd_idle_set(sp, dlp, mhep) != 0) return (-1); /* release drives */ for (i = 0; (i < dlp->dl_ndrive); i++) { mhd_drive_t *dp = dlp->dl_drives[i]; if (mhd_state(dp, DRIVE_RELEASING, mhep) != 0) return (-1); } mhd_wait_set(sp, dlp, DRIVE_IDLE); /* return success */ return (0); } /* * release drives in set */ int mhd_release_drives( mhd_set_t *mhsp, mhd_opts_t options, mhd_error_t *mhep ) { mhd_drive_list_t dl = mhd_null_list; mhd_drive_set_t *sp; int rval; /* grab global lock */ mhd_mx_lock(&mhd_set_mx); /* create or update set */ if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (-1); } /* lock set */ mhd_mx_lock(&sp->sr_mx); /* release drives */ rval = mhd_release_set(sp, &dl, mhep); /* unlock, return success */ out: mhd_mx_unlock(&sp->sr_mx); mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (rval); } /* * reserve drives */ static int mhd_reserve_set( mhd_drive_set_t *sp, mhd_drive_list_t *dlp, mhd_error_t *mhep ) { mhd_msec_t ff = sp->sr_timeouts.mh_ff; uint_t retry, i, ok; int rval = 0; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); /* idle set, idle everyone if cancelling failfast */ if (ff == 0) { if (mhd_idle_set(sp, &sp->sr_drives, mhep) != 0) return (-1); } else { if (mhd_idle_set(sp, dlp, mhep) != 0) return (-1); } /* * Try to take ownership of the drives twice. This helps * to avoid the situation where the other machine retakes * ownership of a majority drives back, but then kills itself * leaving no owners. */ for (retry = 0; (retry < 2); ++retry) { for (i = 0; (i < dlp->dl_ndrive); i++) { mhd_drive_t *dp = dlp->dl_drives[i]; if ((retry == 0) || ((dp->dr_state == DRIVE_ERRORED) && (dp->dr_errnum == EACCES))) { if (mhd_state(dp, DRIVE_RESERVING, mhep) != 0) return (-1); } } mhd_wait_set(sp, dlp, DRIVE_IDLE); } /* * Did the take ownership succeed on a majority of the drives? */ ok = 0; for (i = 0; (i < dlp->dl_ndrive); ++i) { mhd_drive_t *dp = dlp->dl_drives[i]; if (dp->dr_state == DRIVE_IDLE) ++ok; } /* * Let the replica majority be the deciding factor, if able to get * at least a single drive reserved. */ if (ok == 0) { rval = mhd_error(mhep, MHD_E_MAJORITY, sp->sr_name); goto out; } /* * Enable the failfast probes if we haven't given up yet. */ switch (sp->sr_ff_mode) { /* do nothing */ default: assert(0); /* FALLTHROUGH */ case MHD_FF_NONE: goto out; /* old style per drive failfast */ case MHD_FF_DRIVER: for (i = 0; (i < dlp->dl_ndrive); i++) { mhd_drive_t *dp = dlp->dl_drives[i]; if (dp->dr_state != DRIVE_ERRORED) { if (mhd_state(dp, DRIVE_FAILFASTING, mhep) != 0) { rval = -1; goto out; } } } mhd_wait_set(sp, dlp, DRIVE_IDLE); break; /* failfast probe threads */ case MHD_FF_DEBUG: case MHD_FF_HALT: case MHD_FF_PANIC: if (ff != 0) { if (mhd_ff_open(sp, mhep) != 0) { rval = -1; goto out; } for (i = 0; (i < dlp->dl_ndrive); i++) { mhd_drive_t *dp = dlp->dl_drives[i]; if (mhd_state_set(dp, DRIVE_PROBING, mhep) != 0) { rval = -1; goto out; } dp->dr_time = mhd_time(); } (void) mhd_ff_rearm(sp, mhep); } break; } /* cleanup, return success */ out: if (rval != 0) { mhd_error_t status = mhd_null_error; (void) mhd_release_set(sp, dlp, &status); mhd_clrerror(&status); } return (rval); } /* * reserve drives in set */ int mhd_reserve_drives( mhd_set_t *mhsp, mhd_mhiargs_t *timeoutp, mhd_ff_mode_t ff_mode, mhd_opts_t options, mhd_error_t *mhep ) { mhd_drive_list_t dl = mhd_null_list; mhd_drive_set_t *sp; int rval; /* grab global lock */ mhd_mx_lock(&mhd_set_mx); /* create or update set */ if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (-1); } /* lock set */ mhd_mx_lock(&sp->sr_mx); /* can't change mode or timeouts of partial set */ if ((dl.dl_ndrive != sp->sr_drives.dl_ndrive) && (options & MHD_PARTIAL_SET)) { if (ff_mode != sp->sr_ff_mode) { mhd_eprintf("%s: invalid ff_mode %d now %d\n", sp->sr_name, ff_mode, sp->sr_ff_mode); ff_mode = sp->sr_ff_mode; } if (timeoutp->mh_ff < sp->sr_timeouts.mh_ff) { mhd_eprintf("%s: invalid mh_ff %d now %d\n", sp->sr_name, timeoutp->mh_ff, sp->sr_timeouts.mh_ff); timeoutp->mh_ff = sp->sr_timeouts.mh_ff; } } /* save timouts and mode */ sp->sr_timeouts = *timeoutp; sp->sr_ff_mode = ff_mode; /* reserve drives */ rval = mhd_reserve_set(sp, &dl, mhep); /* unlock, return success */ out: mhd_mx_unlock(&sp->sr_mx); mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (rval); } /* * status drives */ static int mhd_status_set( mhd_drive_set_t *sp, mhd_drive_list_t *dlp, mhd_error_t *mhep ) { uint_t i; /* check locks */ assert(MUTEX_HELD(&mhd_set_mx)); assert(MUTEX_HELD(&sp->sr_mx)); /* status drives */ for (i = 0; (i < dlp->dl_ndrive); i++) { mhd_drive_t *dp = dlp->dl_drives[i]; if (mhd_state_set(dp, DRIVE_STATUSING, mhep) != 0) return (-1); } mhd_wait_set(sp, dlp, DRIVE_STATUSING); /* return success */ return (0); } /* * status drives in set */ int mhd_status_drives( mhd_set_t *mhsp, mhd_opts_t options, mhd_drive_status_t **status, mhd_error_t *mhep ) { mhd_drive_list_t dl = mhd_null_list; mhd_drive_list_t *dlp = &dl; mhd_drive_set_t *sp; uint_t i; int rval = 0; /* grab global lock */ mhd_mx_lock(&mhd_set_mx); /* create or update set */ if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (-1); } /* lock set */ mhd_mx_lock(&sp->sr_mx); /* status drives */ if (mhd_status_set(sp, &dl, mhep) != 0) { rval = -1; goto out; } /* build list */ *status = Zalloc(dlp->dl_ndrive * sizeof (**status)); for (i = 0; (i < dlp->dl_ndrive); ++i) { mhd_drive_t *dp = dlp->dl_drives[i]; mhd_drive_status_t *statusp = &(*status)[i]; statusp->drive = Strdup(dp->dr_rname); statusp->errnum = dp->dr_errnum; } assert(i == dlp->dl_ndrive); rval = dlp->dl_ndrive; /* unlock, return count */ out: mhd_mx_unlock(&sp->sr_mx); mhd_mx_unlock(&mhd_set_mx); mhd_free_list(&dl); return (rval); }