1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #include <sys/thread.h> 35 #include <sys/file.h> 36 #include <sys/vfs.h> 37 #include <sys/zfs_znode.h> 38 #include <sys/zfs_dir.h> 39 #include <sys/zil.h> 40 #include <sys/byteorder.h> 41 #include <sys/policy.h> 42 #include <sys/stat.h> 43 #include <sys/mode.h> 44 #include <sys/acl.h> 45 #include <sys/dmu.h> 46 #include <sys/spa.h> 47 #include <sys/ddi.h> 48 49 /* 50 * All the functions in this file are used to construct the log entries 51 * to record transactions. They allocate * a intent log transaction 52 * structure (itx_t) and save within it all the information necessary to 53 * possibly replay the transaction. The itx is then assigned a sequence 54 * number and inserted in the in-memory list anchored in the zilog. 55 */ 56 57 /* 58 * zfs_log_create() is used to handle TX_CREATE, TX_MKDIR and TX_MKXATTR 59 * transactions. 60 */ 61 void 62 zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype, 63 znode_t *dzp, znode_t *zp, char *name) 64 { 65 itx_t *itx; 66 uint64_t seq; 67 lr_create_t *lr; 68 size_t namesize = strlen(name) + 1; 69 70 if (zilog == NULL) 71 return; 72 73 itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 74 lr = (lr_create_t *)&itx->itx_lr; 75 lr->lr_doid = dzp->z_id; 76 lr->lr_foid = zp->z_id; 77 lr->lr_mode = zp->z_phys->zp_mode; 78 lr->lr_uid = zp->z_phys->zp_uid; 79 lr->lr_gid = zp->z_phys->zp_gid; 80 lr->lr_gen = zp->z_phys->zp_gen; 81 lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 82 lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 83 lr->lr_rdev = zp->z_phys->zp_rdev; 84 bcopy(name, (char *)(lr + 1), namesize); 85 86 seq = zil_itx_assign(zilog, itx, tx); 87 dzp->z_last_itx = seq; 88 zp->z_last_itx = seq; 89 } 90 91 /* 92 * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions. 93 */ 94 void 95 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype, 96 znode_t *dzp, char *name) 97 { 98 itx_t *itx; 99 uint64_t seq; 100 lr_remove_t *lr; 101 size_t namesize = strlen(name) + 1; 102 103 if (zilog == NULL) 104 return; 105 106 itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 107 lr = (lr_remove_t *)&itx->itx_lr; 108 lr->lr_doid = dzp->z_id; 109 bcopy(name, (char *)(lr + 1), namesize); 110 111 seq = zil_itx_assign(zilog, itx, tx); 112 dzp->z_last_itx = seq; 113 } 114 115 /* 116 * zfs_log_link() handles TX_LINK transactions. 117 */ 118 void 119 zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype, 120 znode_t *dzp, znode_t *zp, char *name) 121 { 122 itx_t *itx; 123 uint64_t seq; 124 lr_link_t *lr; 125 size_t namesize = strlen(name) + 1; 126 127 if (zilog == NULL) 128 return; 129 130 itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 131 lr = (lr_link_t *)&itx->itx_lr; 132 lr->lr_doid = dzp->z_id; 133 lr->lr_link_obj = zp->z_id; 134 bcopy(name, (char *)(lr + 1), namesize); 135 136 seq = zil_itx_assign(zilog, itx, tx); 137 dzp->z_last_itx = seq; 138 zp->z_last_itx = seq; 139 } 140 141 /* 142 * zfs_log_symlink() handles TX_SYMLINK transactions. 143 */ 144 void 145 zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype, 146 znode_t *dzp, znode_t *zp, char *name, char *link) 147 { 148 itx_t *itx; 149 uint64_t seq; 150 lr_create_t *lr; 151 size_t namesize = strlen(name) + 1; 152 size_t linksize = strlen(link) + 1; 153 154 if (zilog == NULL) 155 return; 156 157 itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); 158 lr = (lr_create_t *)&itx->itx_lr; 159 lr->lr_doid = dzp->z_id; 160 lr->lr_foid = zp->z_id; 161 lr->lr_mode = zp->z_phys->zp_mode; 162 lr->lr_uid = zp->z_phys->zp_uid; 163 lr->lr_gid = zp->z_phys->zp_gid; 164 lr->lr_gen = zp->z_phys->zp_gen; 165 lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 166 lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 167 bcopy(name, (char *)(lr + 1), namesize); 168 bcopy(link, (char *)(lr + 1) + namesize, linksize); 169 170 seq = zil_itx_assign(zilog, itx, tx); 171 dzp->z_last_itx = seq; 172 zp->z_last_itx = seq; 173 } 174 175 /* 176 * zfs_log_rename() handles TX_RENAME transactions. 177 */ 178 void 179 zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype, 180 znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) 181 { 182 itx_t *itx; 183 uint64_t seq; 184 lr_rename_t *lr; 185 size_t snamesize = strlen(sname) + 1; 186 size_t dnamesize = strlen(dname) + 1; 187 188 if (zilog == NULL) 189 return; 190 191 itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); 192 lr = (lr_rename_t *)&itx->itx_lr; 193 lr->lr_sdoid = sdzp->z_id; 194 lr->lr_tdoid = tdzp->z_id; 195 bcopy(sname, (char *)(lr + 1), snamesize); 196 bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); 197 198 seq = zil_itx_assign(zilog, itx, tx); 199 sdzp->z_last_itx = seq; 200 tdzp->z_last_itx = seq; 201 szp->z_last_itx = seq; 202 } 203 204 /* 205 * zfs_log_write() handles TX_WRITE transactions. 206 */ 207 ssize_t zfs_immediate_write_sz = 32768; 208 209 void 210 zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, 211 znode_t *zp, offset_t off, ssize_t len, int ioflag, uio_t *uio) 212 { 213 itx_t *itx; 214 uint64_t seq; 215 lr_write_t *lr; 216 itx_wr_state_t write_state; 217 size_t dlen; 218 int err; 219 220 if (zilog == NULL || zp->z_unlinked) 221 return; 222 223 /* 224 * Writes are handled in three different ways: 225 * 226 * WR_INDIRECT: 227 * If the write is greater than zfs_immediate_write_sz then 228 * later *if* we need to log the write then dmu_sync() is used 229 * to immediately write the block and it's block pointer is put 230 * in the log record. 231 * WR_COPIED: 232 * If we know we'll immediately be committing the 233 * transaction (FDSYNC (O_DSYNC)), the we allocate a larger 234 * log record here for the data and copy the data in. 235 * WR_NEED_COPY: 236 * Otherwise we don't allocate a buffer, and *if* we need to 237 * flush the write later then a buffer is allocated and 238 * we retrieve the data using the dmu. 239 */ 240 if (len > zfs_immediate_write_sz) { 241 dlen = 0; 242 write_state = WR_INDIRECT; 243 } else if (ioflag & FDSYNC) { 244 dlen = len; 245 write_state = WR_COPIED; 246 } else { 247 dlen = 0; 248 write_state = WR_NEED_COPY; 249 } 250 itx = zil_itx_create(txtype, sizeof (*lr) + dlen); 251 if (write_state == WR_COPIED) { 252 err = xcopyin(uio->uio_iov->iov_base - len, 253 (char *)itx + offsetof(itx_t, itx_lr) + sizeof (*lr), len); 254 /* 255 * xcopyin shouldn't error as we've already successfully 256 * copied it to a dmu buffer. However if it does we'll get 257 * the data from the dmu later. 258 */ 259 if (err) { 260 kmem_free(itx, offsetof(itx_t, itx_lr) 261 + itx->itx_lr.lrc_reclen); 262 itx = zil_itx_create(txtype, sizeof (*lr)); 263 write_state = WR_NEED_COPY; 264 } 265 } 266 itx->itx_wr_state = write_state; 267 lr = (lr_write_t *)&itx->itx_lr; 268 lr->lr_foid = zp->z_id; 269 lr->lr_offset = off; 270 lr->lr_length = len; 271 lr->lr_blkoff = 0; 272 BP_ZERO(&lr->lr_blkptr); 273 274 itx->itx_private = zp->z_zfsvfs; 275 276 itx->itx_sync = (zp->z_sync_cnt != 0); 277 seq = zil_itx_assign(zilog, itx, tx); 278 zp->z_last_itx = seq; 279 } 280 281 /* 282 * zfs_log_truncate() handles TX_TRUNCATE transactions. 283 */ 284 void 285 zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, 286 znode_t *zp, uint64_t off, uint64_t len) 287 { 288 itx_t *itx; 289 uint64_t seq; 290 lr_truncate_t *lr; 291 292 if (zilog == NULL || zp->z_unlinked) 293 return; 294 295 itx = zil_itx_create(txtype, sizeof (*lr)); 296 lr = (lr_truncate_t *)&itx->itx_lr; 297 lr->lr_foid = zp->z_id; 298 lr->lr_offset = off; 299 lr->lr_length = len; 300 301 itx->itx_sync = (zp->z_sync_cnt != 0); 302 seq = zil_itx_assign(zilog, itx, tx); 303 zp->z_last_itx = seq; 304 } 305 306 /* 307 * zfs_log_setattr() handles TX_SETATTR transactions. 308 */ 309 void 310 zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, 311 znode_t *zp, vattr_t *vap, uint_t mask_applied) 312 { 313 itx_t *itx; 314 uint64_t seq; 315 lr_setattr_t *lr; 316 317 if (zilog == NULL || zp->z_unlinked) 318 return; 319 320 itx = zil_itx_create(txtype, sizeof (*lr)); 321 lr = (lr_setattr_t *)&itx->itx_lr; 322 lr->lr_foid = zp->z_id; 323 lr->lr_mask = (uint64_t)mask_applied; 324 lr->lr_mode = (uint64_t)vap->va_mode; 325 lr->lr_uid = (uint64_t)vap->va_uid; 326 lr->lr_gid = (uint64_t)vap->va_gid; 327 lr->lr_size = (uint64_t)vap->va_size; 328 ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); 329 ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); 330 331 itx->itx_sync = (zp->z_sync_cnt != 0); 332 seq = zil_itx_assign(zilog, itx, tx); 333 zp->z_last_itx = seq; 334 } 335 336 /* 337 * zfs_log_acl() handles TX_ACL transactions. 338 */ 339 void 340 zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype, 341 znode_t *zp, int aclcnt, ace_t *z_ace) 342 { 343 itx_t *itx; 344 uint64_t seq; 345 lr_acl_t *lr; 346 347 if (zilog == NULL || zp->z_unlinked) 348 return; 349 350 itx = zil_itx_create(txtype, sizeof (*lr) + aclcnt * sizeof (ace_t)); 351 lr = (lr_acl_t *)&itx->itx_lr; 352 lr->lr_foid = zp->z_id; 353 lr->lr_aclcnt = (uint64_t)aclcnt; 354 bcopy(z_ace, (ace_t *)(lr + 1), aclcnt * sizeof (ace_t)); 355 356 itx->itx_sync = (zp->z_sync_cnt != 0); 357 seq = zil_itx_assign(zilog, itx, tx); 358 zp->z_last_itx = seq; 359 } 360