1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2015, 2018 by Delphix. All rights reserved. 24 */ 25 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/sysmacros.h> 30 #include <sys/cmn_err.h> 31 #include <sys/kmem.h> 32 #include <sys/thread.h> 33 #include <sys/file.h> 34 #include <sys/vfs.h> 35 #include <sys/zfs_znode.h> 36 #include <sys/zfs_dir.h> 37 #include <sys/zil.h> 38 #include <sys/zil_impl.h> 39 #include <sys/byteorder.h> 40 #include <sys/policy.h> 41 #include <sys/stat.h> 42 #include <sys/acl.h> 43 #include <sys/dmu.h> 44 #include <sys/dbuf.h> 45 #include <sys/spa.h> 46 #include <sys/zfs_fuid.h> 47 #include <sys/dsl_dataset.h> 48 49 /* 50 * These zfs_log_* functions must be called within a dmu tx, in one 51 * of 2 contexts depending on zilog->z_replay: 52 * 53 * Non replay mode 54 * --------------- 55 * We need to record the transaction so that if it is committed to 56 * the Intent Log then it can be replayed. An intent log transaction 57 * structure (itx_t) is allocated and all the information necessary to 58 * possibly replay the transaction is saved in it. The itx is then assigned 59 * a sequence number and inserted in the in-memory list anchored in the zilog. 60 * 61 * Replay mode 62 * ----------- 63 * We need to mark the intent log record as replayed in the log header. 64 * This is done in the same transaction as the replay so that they 65 * commit atomically. 66 */ 67 68 int 69 zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) 70 { 71 int isxvattr = (vap->va_mask & ATTR_XVATTR); 72 switch (type) { 73 case Z_FILE: 74 if (vsecp == NULL && !isxvattr) 75 return (TX_CREATE); 76 if (vsecp && isxvattr) 77 return (TX_CREATE_ACL_ATTR); 78 if (vsecp) 79 return (TX_CREATE_ACL); 80 else 81 return (TX_CREATE_ATTR); 82 /*NOTREACHED*/ 83 case Z_DIR: 84 if (vsecp == NULL && !isxvattr) 85 return (TX_MKDIR); 86 if (vsecp && isxvattr) 87 return (TX_MKDIR_ACL_ATTR); 88 if (vsecp) 89 return (TX_MKDIR_ACL); 90 else 91 return (TX_MKDIR_ATTR); 92 case Z_XATTRDIR: 93 return (TX_MKXATTR); 94 } 95 ASSERT(0); 96 return (TX_MAX_TYPE); 97 } 98 99 /* 100 * build up the log data necessary for logging xvattr_t 101 * First lr_attr_t is initialized. following the lr_attr_t 102 * is the mapsize and attribute bitmap copied from the xvattr_t. 103 * Following the bitmap and bitmapsize two 64 bit words are reserved 104 * for the create time which may be set. Following the create time 105 * records a single 64 bit integer which has the bits to set on 106 * replay for the xvattr. 107 */ 108 static void 109 zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) 110 { 111 uint32_t *bitmap; 112 uint64_t *attrs; 113 uint64_t *crtime; 114 xoptattr_t *xoap; 115 void *scanstamp; 116 int i; 117 118 xoap = xva_getxoptattr(xvap); 119 ASSERT(xoap); 120 121 lrattr->lr_attr_masksize = xvap->xva_mapsize; 122 bitmap = &lrattr->lr_attr_bitmap; 123 for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) { 124 *bitmap = xvap->xva_reqattrmap[i]; 125 } 126 127 /* Now pack the attributes up in a single uint64_t */ 128 attrs = (uint64_t *)bitmap; 129 crtime = attrs + 1; 130 scanstamp = (caddr_t)(crtime + 2); 131 *attrs = 0; 132 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) 133 *attrs |= (xoap->xoa_readonly == 0) ? 0 : 134 XAT0_READONLY; 135 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) 136 *attrs |= (xoap->xoa_hidden == 0) ? 0 : 137 XAT0_HIDDEN; 138 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) 139 *attrs |= (xoap->xoa_system == 0) ? 0 : 140 XAT0_SYSTEM; 141 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) 142 *attrs |= (xoap->xoa_archive == 0) ? 0 : 143 XAT0_ARCHIVE; 144 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) 145 *attrs |= (xoap->xoa_immutable == 0) ? 0 : 146 XAT0_IMMUTABLE; 147 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) 148 *attrs |= (xoap->xoa_nounlink == 0) ? 0 : 149 XAT0_NOUNLINK; 150 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) 151 *attrs |= (xoap->xoa_appendonly == 0) ? 0 : 152 XAT0_APPENDONLY; 153 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) 154 *attrs |= (xoap->xoa_opaque == 0) ? 0 : 155 XAT0_APPENDONLY; 156 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) 157 *attrs |= (xoap->xoa_nodump == 0) ? 0 : 158 XAT0_NODUMP; 159 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) 160 *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 : 161 XAT0_AV_QUARANTINED; 162 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) 163 *attrs |= (xoap->xoa_av_modified == 0) ? 0 : 164 XAT0_AV_MODIFIED; 165 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 166 ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); 167 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 168 ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID)); 169 170 bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); 171 } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 172 /* 173 * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid 174 * at the same time, so we can share the same space. 175 */ 176 bcopy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t)); 177 } 178 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) 179 *attrs |= (xoap->xoa_reparse == 0) ? 0 : 180 XAT0_REPARSE; 181 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) 182 *attrs |= (xoap->xoa_offline == 0) ? 0 : 183 XAT0_OFFLINE; 184 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) 185 *attrs |= (xoap->xoa_sparse == 0) ? 0 : 186 XAT0_SPARSE; 187 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) 188 *attrs |= (xoap->xoa_projinherit == 0) ? 0 : 189 XAT0_PROJINHERIT; 190 } 191 192 static void * 193 zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start) 194 { 195 zfs_fuid_t *zfuid; 196 uint64_t *fuidloc = start; 197 198 /* First copy in the ACE FUIDs */ 199 for (zfuid = list_head(&fuidp->z_fuids); zfuid; 200 zfuid = list_next(&fuidp->z_fuids, zfuid)) { 201 *fuidloc++ = zfuid->z_logfuid; 202 } 203 return (fuidloc); 204 } 205 206 207 static void * 208 zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) 209 { 210 zfs_fuid_domain_t *zdomain; 211 212 /* now copy in the domain info, if any */ 213 if (fuidp->z_domain_str_sz != 0) { 214 for (zdomain = list_head(&fuidp->z_domains); zdomain; 215 zdomain = list_next(&fuidp->z_domains, zdomain)) { 216 bcopy((void *)zdomain->z_domain, start, 217 strlen(zdomain->z_domain) + 1); 218 start = (caddr_t)start + 219 strlen(zdomain->z_domain) + 1; 220 } 221 } 222 return (start); 223 } 224 225 /* 226 * If zp is an xattr node, check whether the xattr owner is unlinked. 227 * We don't want to log anything if the owner is unlinked. 228 */ 229 static int 230 zfs_xattr_owner_unlinked(znode_t *zp) 231 { 232 int unlinked = 0; 233 znode_t *dzp; 234 #ifdef __FreeBSD__ 235 znode_t *tzp = zp; 236 237 /* 238 * zrele drops the vnode lock which violates the VOP locking contract 239 * on FreeBSD. See comment at the top of zfs_replay.c for more detail. 240 */ 241 /* 242 * if zp is XATTR node, keep walking up via z_xattr_parent until we 243 * get the owner 244 */ 245 while (tzp->z_pflags & ZFS_XATTR) { 246 ASSERT3U(zp->z_xattr_parent, !=, 0); 247 if (zfs_zget(ZTOZSB(tzp), tzp->z_xattr_parent, &dzp) != 0) { 248 unlinked = 1; 249 break; 250 } 251 252 if (tzp != zp) 253 zrele(tzp); 254 tzp = dzp; 255 unlinked = tzp->z_unlinked; 256 } 257 if (tzp != zp) 258 zrele(tzp); 259 #else 260 zhold(zp); 261 /* 262 * if zp is XATTR node, keep walking up via z_xattr_parent until we 263 * get the owner 264 */ 265 while (zp->z_pflags & ZFS_XATTR) { 266 ASSERT3U(zp->z_xattr_parent, !=, 0); 267 if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) { 268 unlinked = 1; 269 break; 270 } 271 272 zrele(zp); 273 zp = dzp; 274 unlinked = zp->z_unlinked; 275 } 276 zrele(zp); 277 #endif 278 return (unlinked); 279 } 280 281 /* 282 * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and 283 * TK_MKXATTR transactions. 284 * 285 * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID 286 * domain information appended prior to the name. In this case the 287 * uid/gid in the log record will be a log centric FUID. 288 * 289 * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that 290 * may contain attributes, ACL and optional fuid information. 291 * 292 * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify 293 * and ACL and normal users/groups in the ACEs. 294 * 295 * There may be an optional xvattr attribute information similar 296 * to zfs_log_setattr. 297 * 298 * Also, after the file name "domain" strings may be appended. 299 */ 300 void 301 zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 302 znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp, 303 zfs_fuid_info_t *fuidp, vattr_t *vap) 304 { 305 itx_t *itx; 306 lr_create_t *lr; 307 lr_acl_create_t *lracl; 308 size_t aclsize = 0; 309 size_t xvatsize = 0; 310 size_t txsize; 311 xvattr_t *xvap = (xvattr_t *)vap; 312 void *end; 313 size_t lrsize; 314 size_t namesize = strlen(name) + 1; 315 size_t fuidsz = 0; 316 317 if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp)) 318 return; 319 320 /* 321 * If we have FUIDs present then add in space for 322 * domains and ACE fuid's if any. 323 */ 324 if (fuidp) { 325 fuidsz += fuidp->z_domain_str_sz; 326 fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); 327 } 328 329 if (vap->va_mask & ATTR_XVATTR) 330 xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); 331 332 if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || 333 (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || 334 (int)txtype == TX_MKXATTR) { 335 txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; 336 lrsize = sizeof (*lr); 337 } else { 338 txsize = 339 sizeof (lr_acl_create_t) + namesize + fuidsz + 340 ZIL_ACE_LENGTH(aclsize) + xvatsize; 341 lrsize = sizeof (lr_acl_create_t); 342 } 343 344 itx = zil_itx_create(txtype, txsize); 345 346 lr = (lr_create_t *)&itx->itx_lr; 347 lr->lr_doid = dzp->z_id; 348 lr->lr_foid = zp->z_id; 349 /* Store dnode slot count in 8 bits above object id. */ 350 LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT); 351 lr->lr_mode = zp->z_mode; 352 if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOUID(zp)))) { 353 lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOUID(zp)); 354 } else { 355 lr->lr_uid = fuidp->z_fuid_owner; 356 } 357 if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOGID(zp)))) { 358 lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOGID(zp)); 359 } else { 360 lr->lr_gid = fuidp->z_fuid_group; 361 } 362 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, 363 sizeof (uint64_t)); 364 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), 365 lr->lr_crtime, sizeof (uint64_t) * 2); 366 367 if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev, 368 sizeof (lr->lr_rdev)) != 0) 369 lr->lr_rdev = 0; 370 371 /* 372 * Fill in xvattr info if any 373 */ 374 if (vap->va_mask & ATTR_XVATTR) { 375 zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); 376 end = (caddr_t)lr + lrsize + xvatsize; 377 } else { 378 end = (caddr_t)lr + lrsize; 379 } 380 381 /* Now fill in any ACL info */ 382 383 if (vsecp) { 384 lracl = (lr_acl_create_t *)&itx->itx_lr; 385 lracl->lr_aclcnt = vsecp->vsa_aclcnt; 386 lracl->lr_acl_bytes = aclsize; 387 lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 388 lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 389 if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS) 390 lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 391 else 392 lracl->lr_acl_flags = 0; 393 394 bcopy(vsecp->vsa_aclentp, end, aclsize); 395 end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); 396 } 397 398 /* drop in FUID info */ 399 if (fuidp) { 400 end = zfs_log_fuid_ids(fuidp, end); 401 end = zfs_log_fuid_domains(fuidp, end); 402 } 403 /* 404 * Now place file name in log record 405 */ 406 bcopy(name, end, namesize); 407 408 zil_itx_assign(zilog, itx, tx); 409 } 410 411 /* 412 * Handles both TX_REMOVE and TX_RMDIR transactions. 413 */ 414 void 415 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 416 znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked) 417 { 418 itx_t *itx; 419 lr_remove_t *lr; 420 size_t namesize = strlen(name) + 1; 421 422 if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp)) 423 return; 424 425 itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 426 lr = (lr_remove_t *)&itx->itx_lr; 427 lr->lr_doid = dzp->z_id; 428 bcopy(name, (char *)(lr + 1), namesize); 429 430 itx->itx_oid = foid; 431 432 /* 433 * Object ids can be re-instantiated in the next txg so 434 * remove any async transactions to avoid future leaks. 435 * This can happen if a fsync occurs on the re-instantiated 436 * object for a WR_INDIRECT or WR_NEED_COPY write, which gets 437 * the new file data and flushes a write record for the old object. 438 */ 439 if (unlinked) { 440 ASSERT((txtype & ~TX_CI) == TX_REMOVE); 441 zil_remove_async(zilog, foid); 442 } 443 zil_itx_assign(zilog, itx, tx); 444 } 445 446 /* 447 * Handles TX_LINK transactions. 448 */ 449 void 450 zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 451 znode_t *dzp, znode_t *zp, char *name) 452 { 453 itx_t *itx; 454 lr_link_t *lr; 455 size_t namesize = strlen(name) + 1; 456 457 if (zil_replaying(zilog, tx)) 458 return; 459 460 itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 461 lr = (lr_link_t *)&itx->itx_lr; 462 lr->lr_doid = dzp->z_id; 463 lr->lr_link_obj = zp->z_id; 464 bcopy(name, (char *)(lr + 1), namesize); 465 466 zil_itx_assign(zilog, itx, tx); 467 } 468 469 /* 470 * Handles TX_SYMLINK transactions. 471 */ 472 void 473 zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 474 znode_t *dzp, znode_t *zp, char *name, char *link) 475 { 476 itx_t *itx; 477 lr_create_t *lr; 478 size_t namesize = strlen(name) + 1; 479 size_t linksize = strlen(link) + 1; 480 481 if (zil_replaying(zilog, tx)) 482 return; 483 484 itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); 485 lr = (lr_create_t *)&itx->itx_lr; 486 lr->lr_doid = dzp->z_id; 487 lr->lr_foid = zp->z_id; 488 lr->lr_uid = KUID_TO_SUID(ZTOUID(zp)); 489 lr->lr_gid = KGID_TO_SGID(ZTOGID(zp)); 490 lr->lr_mode = zp->z_mode; 491 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, 492 sizeof (uint64_t)); 493 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), 494 lr->lr_crtime, sizeof (uint64_t) * 2); 495 bcopy(name, (char *)(lr + 1), namesize); 496 bcopy(link, (char *)(lr + 1) + namesize, linksize); 497 498 zil_itx_assign(zilog, itx, tx); 499 } 500 501 /* 502 * Handles TX_RENAME transactions. 503 */ 504 void 505 zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 506 znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) 507 { 508 itx_t *itx; 509 lr_rename_t *lr; 510 size_t snamesize = strlen(sname) + 1; 511 size_t dnamesize = strlen(dname) + 1; 512 513 if (zil_replaying(zilog, tx)) 514 return; 515 516 itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); 517 lr = (lr_rename_t *)&itx->itx_lr; 518 lr->lr_sdoid = sdzp->z_id; 519 lr->lr_tdoid = tdzp->z_id; 520 bcopy(sname, (char *)(lr + 1), snamesize); 521 bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); 522 itx->itx_oid = szp->z_id; 523 524 zil_itx_assign(zilog, itx, tx); 525 } 526 527 /* 528 * zfs_log_write() handles TX_WRITE transactions. The specified callback is 529 * called as soon as the write is on stable storage (be it via a DMU sync or a 530 * ZIL commit). 531 */ 532 long zfs_immediate_write_sz = 32768; 533 534 void 535 zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, 536 znode_t *zp, offset_t off, ssize_t resid, int ioflag, 537 zil_callback_t callback, void *callback_data) 538 { 539 dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); 540 uint32_t blocksize = zp->z_blksz; 541 itx_wr_state_t write_state; 542 uintptr_t fsync_cnt; 543 544 if (zil_replaying(zilog, tx) || zp->z_unlinked || 545 zfs_xattr_owner_unlinked(zp)) { 546 if (callback != NULL) 547 callback(callback_data); 548 return; 549 } 550 551 if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 552 write_state = WR_INDIRECT; 553 else if (!spa_has_slogs(zilog->zl_spa) && 554 resid >= zfs_immediate_write_sz) 555 write_state = WR_INDIRECT; 556 else if (ioflag & (O_SYNC | O_DSYNC)) 557 write_state = WR_COPIED; 558 else 559 write_state = WR_NEED_COPY; 560 561 if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { 562 (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); 563 } 564 565 while (resid) { 566 itx_t *itx; 567 lr_write_t *lr; 568 itx_wr_state_t wr_state = write_state; 569 ssize_t len = resid; 570 571 /* 572 * A WR_COPIED record must fit entirely in one log block. 573 * Large writes can use WR_NEED_COPY, which the ZIL will 574 * split into multiple records across several log blocks 575 * if necessary. 576 */ 577 if (wr_state == WR_COPIED && 578 resid > zil_max_copied_data(zilog)) 579 wr_state = WR_NEED_COPY; 580 else if (wr_state == WR_INDIRECT) 581 len = MIN(blocksize - P2PHASE(off, blocksize), resid); 582 583 itx = zil_itx_create(txtype, sizeof (*lr) + 584 (wr_state == WR_COPIED ? len : 0)); 585 lr = (lr_write_t *)&itx->itx_lr; 586 587 DB_DNODE_ENTER(db); 588 if (wr_state == WR_COPIED && dmu_read_by_dnode(DB_DNODE(db), 589 off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 590 zil_itx_destroy(itx); 591 itx = zil_itx_create(txtype, sizeof (*lr)); 592 lr = (lr_write_t *)&itx->itx_lr; 593 wr_state = WR_NEED_COPY; 594 } 595 DB_DNODE_EXIT(db); 596 597 itx->itx_wr_state = wr_state; 598 lr->lr_foid = zp->z_id; 599 lr->lr_offset = off; 600 lr->lr_length = len; 601 lr->lr_blkoff = 0; 602 BP_ZERO(&lr->lr_blkptr); 603 604 itx->itx_private = ZTOZSB(zp); 605 606 if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) && 607 (fsync_cnt == 0)) 608 itx->itx_sync = B_FALSE; 609 610 itx->itx_callback = callback; 611 itx->itx_callback_data = callback_data; 612 zil_itx_assign(zilog, itx, tx); 613 614 off += len; 615 resid -= len; 616 } 617 } 618 619 /* 620 * Handles TX_TRUNCATE transactions. 621 */ 622 void 623 zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, 624 znode_t *zp, uint64_t off, uint64_t len) 625 { 626 itx_t *itx; 627 lr_truncate_t *lr; 628 629 if (zil_replaying(zilog, tx) || zp->z_unlinked || 630 zfs_xattr_owner_unlinked(zp)) 631 return; 632 633 itx = zil_itx_create(txtype, sizeof (*lr)); 634 lr = (lr_truncate_t *)&itx->itx_lr; 635 lr->lr_foid = zp->z_id; 636 lr->lr_offset = off; 637 lr->lr_length = len; 638 639 itx->itx_sync = (zp->z_sync_cnt != 0); 640 zil_itx_assign(zilog, itx, tx); 641 } 642 643 /* 644 * Handles TX_SETATTR transactions. 645 */ 646 void 647 zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, 648 znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) 649 { 650 itx_t *itx; 651 lr_setattr_t *lr; 652 xvattr_t *xvap = (xvattr_t *)vap; 653 size_t recsize = sizeof (lr_setattr_t); 654 void *start; 655 656 if (zil_replaying(zilog, tx) || zp->z_unlinked) 657 return; 658 659 /* 660 * If XVATTR set, then log record size needs to allow 661 * for lr_attr_t + xvattr mask, mapsize and create time 662 * plus actual attribute values 663 */ 664 if (vap->va_mask & ATTR_XVATTR) 665 recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); 666 667 if (fuidp) 668 recsize += fuidp->z_domain_str_sz; 669 670 itx = zil_itx_create(txtype, recsize); 671 lr = (lr_setattr_t *)&itx->itx_lr; 672 lr->lr_foid = zp->z_id; 673 lr->lr_mask = (uint64_t)mask_applied; 674 lr->lr_mode = (uint64_t)vap->va_mode; 675 if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) 676 lr->lr_uid = fuidp->z_fuid_owner; 677 else 678 lr->lr_uid = (uint64_t)vap->va_uid; 679 680 if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) 681 lr->lr_gid = fuidp->z_fuid_group; 682 else 683 lr->lr_gid = (uint64_t)vap->va_gid; 684 685 lr->lr_size = (uint64_t)vap->va_size; 686 ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); 687 ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); 688 start = (lr_setattr_t *)(lr + 1); 689 if (vap->va_mask & ATTR_XVATTR) { 690 zfs_log_xvattr((lr_attr_t *)start, xvap); 691 start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); 692 } 693 694 /* 695 * Now stick on domain information if any on end 696 */ 697 698 if (fuidp) 699 (void) zfs_log_fuid_domains(fuidp, start); 700 701 itx->itx_sync = (zp->z_sync_cnt != 0); 702 zil_itx_assign(zilog, itx, tx); 703 } 704 705 /* 706 * Handles TX_ACL transactions. 707 */ 708 void 709 zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, 710 vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) 711 { 712 itx_t *itx; 713 lr_acl_v0_t *lrv0; 714 lr_acl_t *lr; 715 int txtype; 716 int lrsize; 717 size_t txsize; 718 size_t aclbytes = vsecp->vsa_aclentsz; 719 720 if (zil_replaying(zilog, tx) || zp->z_unlinked) 721 return; 722 723 txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ? 724 TX_ACL_V0 : TX_ACL; 725 726 if (txtype == TX_ACL) 727 lrsize = sizeof (*lr); 728 else 729 lrsize = sizeof (*lrv0); 730 731 txsize = lrsize + 732 ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) + 733 (fuidp ? fuidp->z_domain_str_sz : 0) + 734 sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0); 735 736 itx = zil_itx_create(txtype, txsize); 737 738 lr = (lr_acl_t *)&itx->itx_lr; 739 lr->lr_foid = zp->z_id; 740 if (txtype == TX_ACL) { 741 lr->lr_acl_bytes = aclbytes; 742 lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 743 lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 744 if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) 745 lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 746 else 747 lr->lr_acl_flags = 0; 748 } 749 lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt; 750 751 if (txtype == TX_ACL_V0) { 752 lrv0 = (lr_acl_v0_t *)lr; 753 bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes); 754 } else { 755 void *start = (ace_t *)(lr + 1); 756 757 bcopy(vsecp->vsa_aclentp, start, aclbytes); 758 759 start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); 760 761 if (fuidp) { 762 start = zfs_log_fuid_ids(fuidp, start); 763 (void) zfs_log_fuid_domains(fuidp, start); 764 } 765 } 766 767 itx->itx_sync = (zp->z_sync_cnt != 0); 768 zil_itx_assign(zilog, itx, tx); 769 } 770 771 /* BEGIN CSTYLED */ 772 ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, LONG, ZMOD_RW, 773 "Largest data block to write to zil"); 774 /* END CSTYLED */ 775