1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <libzfs.h> 26 27 #include <sys/zfs_context.h> 28 29 #include <errno.h> 30 #include <fcntl.h> 31 #include <stdarg.h> 32 #include <stddef.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <strings.h> 36 #include <sys/file.h> 37 #include <sys/mntent.h> 38 #include <sys/mnttab.h> 39 #include <sys/param.h> 40 #include <sys/stat.h> 41 42 #include <sys/dmu.h> 43 #include <sys/dmu_objset.h> 44 #include <sys/dnode.h> 45 #include <sys/vdev_impl.h> 46 47 #include <sys/mkdev.h> 48 49 #include "zinject.h" 50 51 extern void kernel_init(int); 52 extern void kernel_fini(void); 53 54 static int debug; 55 56 static void 57 ziprintf(const char *fmt, ...) 58 { 59 va_list ap; 60 61 if (!debug) 62 return; 63 64 va_start(ap, fmt); 65 (void) vprintf(fmt, ap); 66 va_end(ap); 67 } 68 69 static void 70 compress_slashes(const char *src, char *dest) 71 { 72 while (*src != '\0') { 73 *dest = *src++; 74 while (*dest == '/' && *src == '/') 75 ++src; 76 ++dest; 77 } 78 *dest = '\0'; 79 } 80 81 /* 82 * Given a full path to a file, translate into a dataset name and a relative 83 * path within the dataset. 'dataset' must be at least MAXNAMELEN characters, 84 * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64 85 * buffer, which we need later to get the object ID. 86 */ 87 static int 88 parse_pathname(const char *inpath, char *dataset, char *relpath, 89 struct stat64 *statbuf) 90 { 91 struct extmnttab mp; 92 FILE *fp; 93 int match; 94 const char *rel; 95 char fullpath[MAXPATHLEN]; 96 97 compress_slashes(inpath, fullpath); 98 99 if (fullpath[0] != '/') { 100 (void) fprintf(stderr, "invalid object '%s': must be full " 101 "path\n", fullpath); 102 usage(); 103 return (-1); 104 } 105 106 if (strlen(fullpath) >= MAXPATHLEN) { 107 (void) fprintf(stderr, "invalid object; pathname too long\n"); 108 return (-1); 109 } 110 111 if (stat64(fullpath, statbuf) != 0) { 112 (void) fprintf(stderr, "cannot open '%s': %s\n", 113 fullpath, strerror(errno)); 114 return (-1); 115 } 116 117 if ((fp = fopen(MNTTAB, "r")) == NULL) { 118 (void) fprintf(stderr, "cannot open /etc/mnttab\n"); 119 return (-1); 120 } 121 122 match = 0; 123 while (getextmntent(fp, &mp, sizeof (mp)) == 0) { 124 if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) { 125 match = 1; 126 break; 127 } 128 } 129 130 if (!match) { 131 (void) fprintf(stderr, "cannot find mountpoint for '%s'\n", 132 fullpath); 133 return (-1); 134 } 135 136 if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) { 137 (void) fprintf(stderr, "invalid path '%s': not a ZFS " 138 "filesystem\n", fullpath); 139 return (-1); 140 } 141 142 if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) { 143 (void) fprintf(stderr, "invalid path '%s': mountpoint " 144 "doesn't match path\n", fullpath); 145 return (-1); 146 } 147 148 (void) strcpy(dataset, mp.mnt_special); 149 150 rel = fullpath + strlen(mp.mnt_mountp); 151 if (rel[0] == '/') 152 rel++; 153 (void) strcpy(relpath, rel); 154 155 return (0); 156 } 157 158 /* 159 * Convert from a (dataset, path) pair into a (objset, object) pair. Note that 160 * we grab the object number from the inode number, since looking this up via 161 * libzpool is a real pain. 162 */ 163 /* ARGSUSED */ 164 static int 165 object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, 166 zinject_record_t *record) 167 { 168 objset_t *os; 169 int err; 170 171 /* 172 * Before doing any libzpool operations, call sync() to ensure that the 173 * on-disk state is consistent with the in-core state. 174 */ 175 sync(); 176 177 err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); 178 if (err != 0) { 179 (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 180 dataset, strerror(err)); 181 return (-1); 182 } 183 184 record->zi_objset = dmu_objset_id(os); 185 record->zi_object = statbuf->st_ino; 186 187 dmu_objset_disown(os, FTAG); 188 189 return (0); 190 } 191 192 /* 193 * Calculate the real range based on the type, level, and range given. 194 */ 195 static int 196 calculate_range(const char *dataset, err_type_t type, int level, char *range, 197 zinject_record_t *record) 198 { 199 objset_t *os = NULL; 200 dnode_t *dn = NULL; 201 int err; 202 int ret = -1; 203 204 /* 205 * Determine the numeric range from the string. 206 */ 207 if (range == NULL) { 208 /* 209 * If range is unspecified, set the range to [0,-1], which 210 * indicates that the whole object should be treated as an 211 * error. 212 */ 213 record->zi_start = 0; 214 record->zi_end = -1ULL; 215 } else { 216 char *end; 217 218 /* XXX add support for suffixes */ 219 record->zi_start = strtoull(range, &end, 10); 220 221 222 if (*end == '\0') 223 record->zi_end = record->zi_start + 1; 224 else if (*end == ',') 225 record->zi_end = strtoull(end + 1, &end, 10); 226 227 if (*end != '\0') { 228 (void) fprintf(stderr, "invalid range '%s': must be " 229 "a numeric range of the form 'start[,end]'\n", 230 range); 231 goto out; 232 } 233 } 234 235 switch (type) { 236 case TYPE_DATA: 237 break; 238 239 case TYPE_DNODE: 240 /* 241 * If this is a request to inject faults into the dnode, then we 242 * must translate the current (objset,object) pair into an 243 * offset within the metadnode for the objset. Specifying any 244 * kind of range with type 'dnode' is illegal. 245 */ 246 if (range != NULL) { 247 (void) fprintf(stderr, "range cannot be specified when " 248 "type is 'dnode'\n"); 249 goto out; 250 } 251 252 record->zi_start = record->zi_object * sizeof (dnode_phys_t); 253 record->zi_end = record->zi_start + sizeof (dnode_phys_t); 254 record->zi_object = 0; 255 break; 256 } 257 258 /* 259 * Get the dnode associated with object, so we can calculate the block 260 * size. 261 */ 262 if ((err = dmu_objset_own(dataset, DMU_OST_ANY, 263 B_TRUE, FTAG, &os)) != 0) { 264 (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 265 dataset, strerror(err)); 266 goto out; 267 } 268 269 if (record->zi_object == 0) { 270 dn = os->os_meta_dnode; 271 } else { 272 err = dnode_hold(os, record->zi_object, FTAG, &dn); 273 if (err != 0) { 274 (void) fprintf(stderr, "failed to hold dnode " 275 "for object %llu\n", 276 (u_longlong_t)record->zi_object); 277 goto out; 278 } 279 } 280 281 282 ziprintf("data shift: %d\n", (int)dn->dn_datablkshift); 283 ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift); 284 285 /* 286 * Translate range into block IDs. 287 */ 288 if (record->zi_start != 0 || record->zi_end != -1ULL) { 289 record->zi_start >>= dn->dn_datablkshift; 290 record->zi_end >>= dn->dn_datablkshift; 291 } 292 293 /* 294 * Check level, and then translate level 0 blkids into ranges 295 * appropriate for level of indirection. 296 */ 297 record->zi_level = level; 298 if (level > 0) { 299 ziprintf("level 0 blkid range: [%llu, %llu]\n", 300 record->zi_start, record->zi_end); 301 302 if (level >= dn->dn_nlevels) { 303 (void) fprintf(stderr, "level %d exceeds max level " 304 "of object (%d)\n", level, dn->dn_nlevels - 1); 305 goto out; 306 } 307 308 if (record->zi_start != 0 || record->zi_end != 0) { 309 int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 310 311 for (; level > 0; level--) { 312 record->zi_start >>= shift; 313 record->zi_end >>= shift; 314 } 315 } 316 } 317 318 ret = 0; 319 out: 320 if (dn) { 321 if (dn != os->os_meta_dnode) 322 dnode_rele(dn, FTAG); 323 } 324 if (os) 325 dmu_objset_disown(os, FTAG); 326 327 return (ret); 328 } 329 330 int 331 translate_record(err_type_t type, const char *object, const char *range, 332 int level, zinject_record_t *record, char *poolname, char *dataset) 333 { 334 char path[MAXPATHLEN]; 335 char *slash; 336 struct stat64 statbuf; 337 int ret = -1; 338 339 kernel_init(FREAD); 340 341 debug = (getenv("ZINJECT_DEBUG") != NULL); 342 343 ziprintf("translating: %s\n", object); 344 345 if (MOS_TYPE(type)) { 346 /* 347 * MOS objects are treated specially. 348 */ 349 switch (type) { 350 case TYPE_MOS: 351 record->zi_type = 0; 352 break; 353 case TYPE_MOSDIR: 354 record->zi_type = DMU_OT_OBJECT_DIRECTORY; 355 break; 356 case TYPE_METASLAB: 357 record->zi_type = DMU_OT_OBJECT_ARRAY; 358 break; 359 case TYPE_CONFIG: 360 record->zi_type = DMU_OT_PACKED_NVLIST; 361 break; 362 case TYPE_BPLIST: 363 record->zi_type = DMU_OT_BPLIST; 364 break; 365 case TYPE_SPACEMAP: 366 record->zi_type = DMU_OT_SPACE_MAP; 367 break; 368 case TYPE_ERRLOG: 369 record->zi_type = DMU_OT_ERROR_LOG; 370 break; 371 } 372 373 dataset[0] = '\0'; 374 (void) strcpy(poolname, object); 375 return (0); 376 } 377 378 /* 379 * Convert a full path into a (dataset, file) pair. 380 */ 381 if (parse_pathname(object, dataset, path, &statbuf) != 0) 382 goto err; 383 384 ziprintf(" dataset: %s\n", dataset); 385 ziprintf(" path: %s\n", path); 386 387 /* 388 * Convert (dataset, file) into (objset, object) 389 */ 390 if (object_from_path(dataset, path, &statbuf, record) != 0) 391 goto err; 392 393 ziprintf("raw objset: %llu\n", record->zi_objset); 394 ziprintf("raw object: %llu\n", record->zi_object); 395 396 /* 397 * For the given object, calculate the real (type, level, range) 398 */ 399 if (calculate_range(dataset, type, level, (char *)range, record) != 0) 400 goto err; 401 402 ziprintf(" objset: %llu\n", record->zi_objset); 403 ziprintf(" object: %llu\n", record->zi_object); 404 if (record->zi_start == 0 && 405 record->zi_end == -1ULL) 406 ziprintf(" range: all\n"); 407 else 408 ziprintf(" range: [%llu, %llu]\n", record->zi_start, 409 record->zi_end); 410 411 /* 412 * Copy the pool name 413 */ 414 (void) strcpy(poolname, dataset); 415 if ((slash = strchr(poolname, '/')) != NULL) 416 *slash = '\0'; 417 418 ret = 0; 419 420 err: 421 kernel_fini(); 422 return (ret); 423 } 424 425 int 426 translate_raw(const char *str, zinject_record_t *record) 427 { 428 /* 429 * A raw bookmark of the form objset:object:level:blkid, where each 430 * number is a hexidecimal value. 431 */ 432 if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset, 433 (u_longlong_t *)&record->zi_object, &record->zi_level, 434 (u_longlong_t *)&record->zi_start) != 4) { 435 (void) fprintf(stderr, "bad raw spec '%s': must be of the form " 436 "'objset:object:level:blkid'\n", str); 437 return (-1); 438 } 439 440 record->zi_end = record->zi_start; 441 442 return (0); 443 } 444 445 int 446 translate_device(const char *pool, const char *device, err_type_t label_type, 447 zinject_record_t *record) 448 { 449 char *end; 450 zpool_handle_t *zhp; 451 nvlist_t *tgt; 452 boolean_t isspare, iscache; 453 454 /* 455 * Given a device name or GUID, create an appropriate injection record 456 * with zi_guid set. 457 */ 458 if ((zhp = zpool_open(g_zfs, pool)) == NULL) 459 return (-1); 460 461 record->zi_guid = strtoull(device, &end, 16); 462 if (record->zi_guid == 0 || *end != '\0') { 463 tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL); 464 465 if (tgt == NULL) { 466 (void) fprintf(stderr, "cannot find device '%s' in " 467 "pool '%s'\n", device, pool); 468 return (-1); 469 } 470 471 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, 472 &record->zi_guid) == 0); 473 } 474 475 switch (label_type) { 476 case TYPE_LABEL_UBERBLOCK: 477 record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]); 478 record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1; 479 break; 480 case TYPE_LABEL_NVLIST: 481 record->zi_start = offsetof(vdev_label_t, vl_vdev_phys); 482 record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1; 483 break; 484 case TYPE_LABEL_PAD1: 485 record->zi_start = offsetof(vdev_label_t, vl_pad1); 486 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 487 break; 488 case TYPE_LABEL_PAD2: 489 record->zi_start = offsetof(vdev_label_t, vl_pad2); 490 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 491 break; 492 } 493 return (0); 494 } 495