1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 #include <libzfs.h> 27 28 #include <sys/zfs_context.h> 29 30 #include <errno.h> 31 #include <fcntl.h> 32 #include <stdarg.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <strings.h> 37 #include <sys/file.h> 38 #include <sys/mntent.h> 39 #include <sys/mnttab.h> 40 #include <sys/param.h> 41 #include <sys/stat.h> 42 43 #include <sys/dmu.h> 44 #include <sys/dmu_objset.h> 45 #include <sys/dnode.h> 46 #include <sys/vdev_impl.h> 47 48 #include <sys/mkdev.h> 49 50 #include "zinject.h" 51 52 extern void kernel_init(int); 53 extern void kernel_fini(void); 54 55 static int debug; 56 57 static void 58 ziprintf(const char *fmt, ...) 59 { 60 va_list ap; 61 62 if (!debug) 63 return; 64 65 va_start(ap, fmt); 66 (void) vprintf(fmt, ap); 67 va_end(ap); 68 } 69 70 static void 71 compress_slashes(const char *src, char *dest) 72 { 73 while (*src != '\0') { 74 *dest = *src++; 75 while (*dest == '/' && *src == '/') 76 ++src; 77 ++dest; 78 } 79 *dest = '\0'; 80 } 81 82 /* 83 * Given a full path to a file, translate into a dataset name and a relative 84 * path within the dataset. 'dataset' must be at least MAXNAMELEN characters, 85 * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64 86 * buffer, which we need later to get the object ID. 87 */ 88 static int 89 parse_pathname(const char *inpath, char *dataset, char *relpath, 90 struct stat64 *statbuf) 91 { 92 struct extmnttab mp; 93 FILE *fp; 94 int match; 95 const char *rel; 96 char fullpath[MAXPATHLEN]; 97 98 compress_slashes(inpath, fullpath); 99 100 if (fullpath[0] != '/') { 101 (void) fprintf(stderr, "invalid object '%s': must be full " 102 "path\n", fullpath); 103 usage(); 104 return (-1); 105 } 106 107 if (strlen(fullpath) >= MAXPATHLEN) { 108 (void) fprintf(stderr, "invalid object; pathname too long\n"); 109 return (-1); 110 } 111 112 if (stat64(fullpath, statbuf) != 0) { 113 (void) fprintf(stderr, "cannot open '%s': %s\n", 114 fullpath, strerror(errno)); 115 return (-1); 116 } 117 118 if ((fp = fopen(MNTTAB, "r")) == NULL) { 119 (void) fprintf(stderr, "cannot open /etc/mnttab\n"); 120 return (-1); 121 } 122 123 match = 0; 124 while (getextmntent(fp, &mp, sizeof (mp)) == 0) { 125 if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) { 126 match = 1; 127 break; 128 } 129 } 130 131 if (!match) { 132 (void) fprintf(stderr, "cannot find mountpoint for '%s'\n", 133 fullpath); 134 return (-1); 135 } 136 137 if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) { 138 (void) fprintf(stderr, "invalid path '%s': not a ZFS " 139 "filesystem\n", fullpath); 140 return (-1); 141 } 142 143 if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) { 144 (void) fprintf(stderr, "invalid path '%s': mountpoint " 145 "doesn't match path\n", fullpath); 146 return (-1); 147 } 148 149 (void) strcpy(dataset, mp.mnt_special); 150 151 rel = fullpath + strlen(mp.mnt_mountp); 152 if (rel[0] == '/') 153 rel++; 154 (void) strcpy(relpath, rel); 155 156 return (0); 157 } 158 159 /* 160 * Convert from a (dataset, path) pair into a (objset, object) pair. Note that 161 * we grab the object number from the inode number, since looking this up via 162 * libzpool is a real pain. 163 */ 164 /* ARGSUSED */ 165 static int 166 object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, 167 zinject_record_t *record) 168 { 169 objset_t *os; 170 int err; 171 172 /* 173 * Before doing any libzpool operations, call sync() to ensure that the 174 * on-disk state is consistent with the in-core state. 175 */ 176 sync(); 177 178 err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); 179 if (err != 0) { 180 (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 181 dataset, strerror(err)); 182 return (-1); 183 } 184 185 record->zi_objset = dmu_objset_id(os); 186 record->zi_object = statbuf->st_ino; 187 188 dmu_objset_disown(os, FTAG); 189 190 return (0); 191 } 192 193 /* 194 * Calculate the real range based on the type, level, and range given. 195 */ 196 static int 197 calculate_range(const char *dataset, err_type_t type, int level, char *range, 198 zinject_record_t *record) 199 { 200 objset_t *os = NULL; 201 dnode_t *dn = NULL; 202 int err; 203 int ret = -1; 204 205 /* 206 * Determine the numeric range from the string. 207 */ 208 if (range == NULL) { 209 /* 210 * If range is unspecified, set the range to [0,-1], which 211 * indicates that the whole object should be treated as an 212 * error. 213 */ 214 record->zi_start = 0; 215 record->zi_end = -1ULL; 216 } else { 217 char *end; 218 219 /* XXX add support for suffixes */ 220 record->zi_start = strtoull(range, &end, 10); 221 222 223 if (*end == '\0') 224 record->zi_end = record->zi_start + 1; 225 else if (*end == ',') 226 record->zi_end = strtoull(end + 1, &end, 10); 227 228 if (*end != '\0') { 229 (void) fprintf(stderr, "invalid range '%s': must be " 230 "a numeric range of the form 'start[,end]'\n", 231 range); 232 goto out; 233 } 234 } 235 236 switch (type) { 237 case TYPE_DATA: 238 break; 239 240 case TYPE_DNODE: 241 /* 242 * If this is a request to inject faults into the dnode, then we 243 * must translate the current (objset,object) pair into an 244 * offset within the metadnode for the objset. Specifying any 245 * kind of range with type 'dnode' is illegal. 246 */ 247 if (range != NULL) { 248 (void) fprintf(stderr, "range cannot be specified when " 249 "type is 'dnode'\n"); 250 goto out; 251 } 252 253 record->zi_start = record->zi_object * sizeof (dnode_phys_t); 254 record->zi_end = record->zi_start + sizeof (dnode_phys_t); 255 record->zi_object = 0; 256 break; 257 } 258 259 /* 260 * Get the dnode associated with object, so we can calculate the block 261 * size. 262 */ 263 if ((err = dmu_objset_own(dataset, DMU_OST_ANY, 264 B_TRUE, FTAG, &os)) != 0) { 265 (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 266 dataset, strerror(err)); 267 goto out; 268 } 269 270 if (record->zi_object == 0) { 271 dn = DMU_META_DNODE(os); 272 } else { 273 err = dnode_hold(os, record->zi_object, FTAG, &dn); 274 if (err != 0) { 275 (void) fprintf(stderr, "failed to hold dnode " 276 "for object %llu\n", 277 (u_longlong_t)record->zi_object); 278 goto out; 279 } 280 } 281 282 283 ziprintf("data shift: %d\n", (int)dn->dn_datablkshift); 284 ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift); 285 286 /* 287 * Translate range into block IDs. 288 */ 289 if (record->zi_start != 0 || record->zi_end != -1ULL) { 290 record->zi_start >>= dn->dn_datablkshift; 291 record->zi_end >>= dn->dn_datablkshift; 292 } 293 294 /* 295 * Check level, and then translate level 0 blkids into ranges 296 * appropriate for level of indirection. 297 */ 298 record->zi_level = level; 299 if (level > 0) { 300 ziprintf("level 0 blkid range: [%llu, %llu]\n", 301 record->zi_start, record->zi_end); 302 303 if (level >= dn->dn_nlevels) { 304 (void) fprintf(stderr, "level %d exceeds max level " 305 "of object (%d)\n", level, dn->dn_nlevels - 1); 306 goto out; 307 } 308 309 if (record->zi_start != 0 || record->zi_end != 0) { 310 int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 311 312 for (; level > 0; level--) { 313 record->zi_start >>= shift; 314 record->zi_end >>= shift; 315 } 316 } 317 } 318 319 ret = 0; 320 out: 321 if (dn) { 322 if (dn != DMU_META_DNODE(os)) 323 dnode_rele(dn, FTAG); 324 } 325 if (os) 326 dmu_objset_disown(os, FTAG); 327 328 return (ret); 329 } 330 331 int 332 translate_record(err_type_t type, const char *object, const char *range, 333 int level, zinject_record_t *record, char *poolname, char *dataset) 334 { 335 char path[MAXPATHLEN]; 336 char *slash; 337 struct stat64 statbuf; 338 int ret = -1; 339 340 kernel_init(FREAD); 341 342 debug = (getenv("ZINJECT_DEBUG") != NULL); 343 344 ziprintf("translating: %s\n", object); 345 346 if (MOS_TYPE(type)) { 347 /* 348 * MOS objects are treated specially. 349 */ 350 switch (type) { 351 case TYPE_MOS: 352 record->zi_type = 0; 353 break; 354 case TYPE_MOSDIR: 355 record->zi_type = DMU_OT_OBJECT_DIRECTORY; 356 break; 357 case TYPE_METASLAB: 358 record->zi_type = DMU_OT_OBJECT_ARRAY; 359 break; 360 case TYPE_CONFIG: 361 record->zi_type = DMU_OT_PACKED_NVLIST; 362 break; 363 case TYPE_BPOBJ: 364 record->zi_type = DMU_OT_BPOBJ; 365 break; 366 case TYPE_SPACEMAP: 367 record->zi_type = DMU_OT_SPACE_MAP; 368 break; 369 case TYPE_ERRLOG: 370 record->zi_type = DMU_OT_ERROR_LOG; 371 break; 372 } 373 374 dataset[0] = '\0'; 375 (void) strcpy(poolname, object); 376 return (0); 377 } 378 379 /* 380 * Convert a full path into a (dataset, file) pair. 381 */ 382 if (parse_pathname(object, dataset, path, &statbuf) != 0) 383 goto err; 384 385 ziprintf(" dataset: %s\n", dataset); 386 ziprintf(" path: %s\n", path); 387 388 /* 389 * Convert (dataset, file) into (objset, object) 390 */ 391 if (object_from_path(dataset, path, &statbuf, record) != 0) 392 goto err; 393 394 ziprintf("raw objset: %llu\n", record->zi_objset); 395 ziprintf("raw object: %llu\n", record->zi_object); 396 397 /* 398 * For the given object, calculate the real (type, level, range) 399 */ 400 if (calculate_range(dataset, type, level, (char *)range, record) != 0) 401 goto err; 402 403 ziprintf(" objset: %llu\n", record->zi_objset); 404 ziprintf(" object: %llu\n", record->zi_object); 405 if (record->zi_start == 0 && 406 record->zi_end == -1ULL) 407 ziprintf(" range: all\n"); 408 else 409 ziprintf(" range: [%llu, %llu]\n", record->zi_start, 410 record->zi_end); 411 412 /* 413 * Copy the pool name 414 */ 415 (void) strcpy(poolname, dataset); 416 if ((slash = strchr(poolname, '/')) != NULL) 417 *slash = '\0'; 418 419 ret = 0; 420 421 err: 422 kernel_fini(); 423 return (ret); 424 } 425 426 int 427 translate_raw(const char *str, zinject_record_t *record) 428 { 429 /* 430 * A raw bookmark of the form objset:object:level:blkid, where each 431 * number is a hexidecimal value. 432 */ 433 if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset, 434 (u_longlong_t *)&record->zi_object, &record->zi_level, 435 (u_longlong_t *)&record->zi_start) != 4) { 436 (void) fprintf(stderr, "bad raw spec '%s': must be of the form " 437 "'objset:object:level:blkid'\n", str); 438 return (-1); 439 } 440 441 record->zi_end = record->zi_start; 442 443 return (0); 444 } 445 446 int 447 translate_device(const char *pool, const char *device, err_type_t label_type, 448 zinject_record_t *record) 449 { 450 char *end; 451 zpool_handle_t *zhp; 452 nvlist_t *tgt; 453 boolean_t isspare, iscache; 454 455 /* 456 * Given a device name or GUID, create an appropriate injection record 457 * with zi_guid set. 458 */ 459 if ((zhp = zpool_open(g_zfs, pool)) == NULL) 460 return (-1); 461 462 record->zi_guid = strtoull(device, &end, 16); 463 if (record->zi_guid == 0 || *end != '\0') { 464 tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL); 465 466 if (tgt == NULL) { 467 (void) fprintf(stderr, "cannot find device '%s' in " 468 "pool '%s'\n", device, pool); 469 return (-1); 470 } 471 472 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, 473 &record->zi_guid) == 0); 474 } 475 476 /* 477 * Device faults can take on three different forms: 478 * 1). delayed or hanging I/O 479 * 2). zfs label faults 480 * 3). generic disk faults 481 */ 482 if (record->zi_timer != 0) { 483 record->zi_cmd = ZINJECT_DELAY_IO; 484 } else if (label_type != TYPE_INVAL) { 485 record->zi_cmd = ZINJECT_LABEL_FAULT; 486 } else { 487 record->zi_cmd = ZINJECT_DEVICE_FAULT; 488 } 489 490 switch (label_type) { 491 case TYPE_LABEL_UBERBLOCK: 492 record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]); 493 record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1; 494 break; 495 case TYPE_LABEL_NVLIST: 496 record->zi_start = offsetof(vdev_label_t, vl_vdev_phys); 497 record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1; 498 break; 499 case TYPE_LABEL_PAD1: 500 record->zi_start = offsetof(vdev_label_t, vl_pad1); 501 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 502 break; 503 case TYPE_LABEL_PAD2: 504 record->zi_start = offsetof(vdev_label_t, vl_pad2); 505 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 506 break; 507 } 508 return (0); 509 } 510