1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <dlfcn.h> 28 #include <errno.h> 29 #include <libintl.h> 30 #include <link.h> 31 #include <pthread.h> 32 #include <strings.h> 33 #include <unistd.h> 34 35 #include <libzfs.h> 36 37 #include <fm/libtopo.h> 38 #include <sys/fm/protocol.h> 39 #include <sys/systeminfo.h> 40 41 #include "libzfs_impl.h" 42 43 /* 44 * This file is responsible for determining the relationship between I/O 45 * devices paths and physical locations. In the world of MPxIO and external 46 * enclosures, the device path is not synonymous with the physical location. 47 * If you remove a drive and insert it into a different slot, it will end up 48 * with the same path under MPxIO. If you recable storage enclosures, the 49 * device paths may change. All of this makes it difficult to implement the 50 * 'autoreplace' property, which is supposed to automatically manage disk 51 * replacement based on physical slot. 52 * 53 * In order to work around these limitations, we have a per-vdev FRU property 54 * that is the libtopo path (minus disk-specific authority information) to the 55 * physical location of the device on the system. This is an optional 56 * property, and is only needed when using the 'autoreplace' property or when 57 * generating FMA faults against vdevs. 58 */ 59 60 /* 61 * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case 62 * it is not present. We only need this once per library instance, so it is 63 * not part of the libzfs handle. 64 */ 65 static void *_topo_dlhandle; 66 static topo_hdl_t *(*_topo_open)(int, const char *, int *); 67 static void (*_topo_close)(topo_hdl_t *); 68 static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *); 69 static void (*_topo_snap_release)(topo_hdl_t *); 70 static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *, 71 topo_walk_cb_t, void *, int *); 72 static int (*_topo_walk_step)(topo_walk_t *, int); 73 static void (*_topo_walk_fini)(topo_walk_t *); 74 static void (*_topo_hdl_strfree)(topo_hdl_t *, char *); 75 static char *(*_topo_node_name)(tnode_t *); 76 static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *, 77 char **, int *); 78 static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *); 79 static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *); 80 static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *, 81 const char *); 82 83 #define ZFS_FRU_HASH_SIZE 257 84 85 static size_t 86 fru_strhash(const char *key) 87 { 88 ulong_t g, h = 0; 89 const char *p; 90 91 for (p = key; *p != '\0'; p++) { 92 h = (h << 4) + *p; 93 94 if ((g = (h & 0xf0000000)) != 0) { 95 h ^= (g >> 24); 96 h ^= g; 97 } 98 } 99 100 return (h % ZFS_FRU_HASH_SIZE); 101 } 102 103 static int 104 libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg) 105 { 106 libzfs_handle_t *hdl = arg; 107 nvlist_t *fru; 108 char *devpath, *frustr; 109 int err; 110 libzfs_fru_t *frup; 111 size_t idx; 112 113 /* 114 * If this is the chassis node, and we don't yet have the system 115 * chassis ID, then fill in this value now. 116 */ 117 if (hdl->libzfs_chassis_id[0] == '\0' && 118 strcmp(_topo_node_name(tn), "chassis") == 0) { 119 if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY, 120 FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0) 121 (void) strlcpy(hdl->libzfs_chassis_id, devpath, 122 sizeof (hdl->libzfs_chassis_id)); 123 } 124 125 /* 126 * Skip non-disk nodes. 127 */ 128 if (strcmp(_topo_node_name(tn), "disk") != 0) 129 return (TOPO_WALK_NEXT); 130 131 /* 132 * Get the devfs path and FRU. 133 */ 134 if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0) 135 return (TOPO_WALK_NEXT); 136 137 if (libzfs_fru_lookup(hdl, devpath) != NULL) { 138 _topo_hdl_strfree(thp, devpath); 139 return (TOPO_WALK_NEXT); 140 } 141 142 if (_topo_node_fru(tn, &fru, NULL, &err) != 0) { 143 _topo_hdl_strfree(thp, devpath); 144 return (TOPO_WALK_NEXT); 145 } 146 147 /* 148 * Convert the FRU into a string. 149 */ 150 if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) { 151 nvlist_free(fru); 152 _topo_hdl_strfree(thp, devpath); 153 return (TOPO_WALK_NEXT); 154 } 155 156 nvlist_free(fru); 157 158 /* 159 * Finally, we have a FRU string and device path. Add it to the hash. 160 */ 161 if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) { 162 _topo_hdl_strfree(thp, devpath); 163 _topo_hdl_strfree(thp, frustr); 164 return (TOPO_WALK_NEXT); 165 } 166 167 if ((frup->zf_device = strdup(devpath)) == NULL || 168 (frup->zf_fru = strdup(frustr)) == NULL) { 169 free(frup->zf_device); 170 free(frup); 171 _topo_hdl_strfree(thp, devpath); 172 _topo_hdl_strfree(thp, frustr); 173 return (TOPO_WALK_NEXT); 174 } 175 176 _topo_hdl_strfree(thp, devpath); 177 _topo_hdl_strfree(thp, frustr); 178 179 idx = fru_strhash(frup->zf_device); 180 frup->zf_chain = hdl->libzfs_fru_hash[idx]; 181 hdl->libzfs_fru_hash[idx] = frup; 182 frup->zf_next = hdl->libzfs_fru_list; 183 hdl->libzfs_fru_list = frup; 184 185 return (TOPO_WALK_NEXT); 186 } 187 188 /* 189 * Called during initialization to setup the dynamic libtopo connection. 190 */ 191 #pragma init(libzfs_init_fru) 192 static void 193 libzfs_init_fru(void) 194 { 195 char path[MAXPATHLEN]; 196 char isa[257]; 197 198 #if defined(_LP64) 199 if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0) 200 isa[0] = '\0'; 201 #else 202 isa[0] = '\0'; 203 #endif 204 (void) snprintf(path, sizeof (path), 205 "/usr/lib/fm/%s/libtopo.so", isa); 206 207 if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL) 208 return; 209 210 _topo_open = (topo_hdl_t *(*)()) 211 dlsym(_topo_dlhandle, "topo_open"); 212 _topo_close = (void (*)()) 213 dlsym(_topo_dlhandle, "topo_close"); 214 _topo_snap_hold = (char *(*)()) 215 dlsym(_topo_dlhandle, "topo_snap_hold"); 216 _topo_snap_release = (void (*)()) 217 dlsym(_topo_dlhandle, "topo_snap_release"); 218 _topo_walk_init = (topo_walk_t *(*)()) 219 dlsym(_topo_dlhandle, "topo_walk_init"); 220 _topo_walk_step = (int (*)()) 221 dlsym(_topo_dlhandle, "topo_walk_step"); 222 _topo_walk_fini = (void (*)()) 223 dlsym(_topo_dlhandle, "topo_walk_fini"); 224 _topo_hdl_strfree = (void (*)()) 225 dlsym(_topo_dlhandle, "topo_hdl_strfree"); 226 _topo_node_name = (char *(*)()) 227 dlsym(_topo_dlhandle, "topo_node_name"); 228 _topo_prop_get_string = (int (*)()) 229 dlsym(_topo_dlhandle, "topo_prop_get_string"); 230 _topo_node_fru = (int (*)()) 231 dlsym(_topo_dlhandle, "topo_node_fru"); 232 _topo_fmri_nvl2str = (int (*)()) 233 dlsym(_topo_dlhandle, "topo_fmri_nvl2str"); 234 _topo_fmri_strcmp_noauth = (int (*)()) 235 dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth"); 236 237 if (_topo_open == NULL || _topo_close == NULL || 238 _topo_snap_hold == NULL || _topo_snap_release == NULL || 239 _topo_walk_init == NULL || _topo_walk_step == NULL || 240 _topo_walk_fini == NULL || _topo_hdl_strfree == NULL || 241 _topo_node_name == NULL || _topo_prop_get_string == NULL || 242 _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL || 243 _topo_fmri_strcmp_noauth == NULL) { 244 (void) dlclose(_topo_dlhandle); 245 _topo_dlhandle = NULL; 246 } 247 } 248 249 /* 250 * Refresh the mappings from device path -> FMRI. We do this by walking the 251 * hc topology looking for disk nodes, and recording the io/devfs-path and FRU. 252 * Note that we strip out the disk-specific authority information (serial, 253 * part, revision, etc) so that we are left with only the identifying 254 * characteristics of the slot (hc path and chassis-id). 255 */ 256 void 257 libzfs_fru_refresh(libzfs_handle_t *hdl) 258 { 259 int err; 260 char *uuid; 261 topo_hdl_t *thp; 262 topo_walk_t *twp; 263 264 if (_topo_dlhandle == NULL) 265 return; 266 267 /* 268 * Clear the FRU hash and initialize our basic structures. 269 */ 270 libzfs_fru_clear(hdl, B_FALSE); 271 272 if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION, 273 NULL, &err)) == NULL) 274 return; 275 276 thp = hdl->libzfs_topo_hdl; 277 278 if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL) 279 return; 280 281 _topo_hdl_strfree(thp, uuid); 282 283 if (hdl->libzfs_fru_hash == NULL && 284 (hdl->libzfs_fru_hash = 285 calloc(ZFS_FRU_HASH_SIZE, sizeof (void *))) == NULL) 286 return; 287 288 /* 289 * We now have a topo snapshot, so iterate over the hc topology looking 290 * for disks to add to the hash. 291 */ 292 twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC, 293 libzfs_fru_gather, hdl, &err); 294 if (twp != NULL) { 295 (void) _topo_walk_step(twp, TOPO_WALK_CHILD); 296 _topo_walk_fini(twp); 297 } 298 } 299 300 /* 301 * Given a devfs path, return the FRU for the device, if known. This will 302 * automatically call libzfs_fru_refresh() if it hasn't already been called by 303 * the consumer. The string returned is valid until the next call to 304 * libzfs_fru_refresh(). 305 */ 306 const char * 307 libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath) 308 { 309 size_t idx = fru_strhash(devpath); 310 libzfs_fru_t *frup; 311 312 if (hdl->libzfs_fru_hash == NULL) 313 libzfs_fru_refresh(hdl); 314 315 if (hdl->libzfs_fru_hash == NULL) 316 return (NULL); 317 318 for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; 319 frup = frup->zf_chain) { 320 if (strcmp(devpath, frup->zf_device) == 0) 321 return (frup->zf_fru); 322 } 323 324 return (NULL); 325 } 326 327 /* 328 * Given a fru path, return the device path. This will automatically call 329 * libzfs_fru_refresh() if it hasn't already been called by the consumer. The 330 * string returned is valid until the next call to libzfs_fru_refresh(). 331 */ 332 const char * 333 libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) 334 { 335 libzfs_fru_t *frup; 336 size_t idx; 337 338 if (hdl->libzfs_fru_hash == NULL) 339 libzfs_fru_refresh(hdl); 340 341 if (hdl->libzfs_fru_hash == NULL) 342 return (NULL); 343 344 for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) { 345 for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; 346 frup = frup->zf_next) { 347 if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, 348 fru, frup->zf_fru)) 349 return (frup->zf_device); 350 } 351 } 352 353 return (NULL); 354 } 355 356 /* 357 * Change the stored FRU for the given vdev. 358 */ 359 int 360 zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) 361 { 362 zfs_cmd_t zc = { 0 }; 363 364 (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); 365 (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); 366 zc.zc_guid = vdev_guid; 367 368 if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0) 369 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, 370 dgettext(TEXT_DOMAIN, "cannot set FRU"))); 371 372 return (0); 373 } 374 375 /* 376 * Compare to two FRUs, ignoring any authority information. 377 */ 378 boolean_t 379 libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b) 380 { 381 if (hdl->libzfs_fru_hash == NULL) 382 libzfs_fru_refresh(hdl); 383 384 if (hdl->libzfs_fru_hash == NULL) 385 return (strcmp(a, b) == 0); 386 387 return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b)); 388 } 389 390 /* 391 * This special function checks to see whether the FRU indicates it's supposed 392 * to be in the system chassis, but the chassis-id doesn't match. This can 393 * happen in a clustered case, where both head nodes have the same logical 394 * disk, but opening the device on the other head node is meaningless. 395 */ 396 boolean_t 397 libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru) 398 { 399 const char *chassisid; 400 size_t len; 401 402 if (hdl->libzfs_fru_hash == NULL) 403 libzfs_fru_refresh(hdl); 404 405 if (hdl->libzfs_chassis_id[0] == '\0') 406 return (B_FALSE); 407 408 if (strstr(fru, "/chassis=0/") == NULL) 409 return (B_FALSE); 410 411 if ((chassisid = strstr(fru, ":chassis-id=")) == NULL) 412 return (B_FALSE); 413 414 chassisid += 12; 415 len = strlen(hdl->libzfs_chassis_id); 416 if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 && 417 (chassisid[len] == '/' || chassisid[len] == ':')) 418 return (B_FALSE); 419 420 return (B_TRUE); 421 } 422 423 /* 424 * Clear memory associated with the FRU hash. 425 */ 426 void 427 libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final) 428 { 429 libzfs_fru_t *frup; 430 431 while ((frup = hdl->libzfs_fru_list) != NULL) { 432 hdl->libzfs_fru_list = frup->zf_next; 433 free(frup->zf_device); 434 free(frup->zf_fru); 435 free(frup); 436 } 437 438 hdl->libzfs_fru_list = NULL; 439 440 if (hdl->libzfs_topo_hdl != NULL) { 441 _topo_snap_release(hdl->libzfs_topo_hdl); 442 _topo_close(hdl->libzfs_topo_hdl); 443 hdl->libzfs_topo_hdl = NULL; 444 } 445 446 if (final) { 447 free(hdl->libzfs_fru_hash); 448 } else if (hdl->libzfs_fru_hash != NULL) { 449 bzero(hdl->libzfs_fru_hash, 450 ZFS_FRU_HASH_SIZE * sizeof (void *)); 451 } 452 } 453