xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_log.c (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/kmem.h>
34 #include <sys/thread.h>
35 #include <sys/file.h>
36 #include <sys/vfs.h>
37 #include <sys/zfs_znode.h>
38 #include <sys/zfs_dir.h>
39 #include <sys/zil.h>
40 #include <sys/byteorder.h>
41 #include <sys/policy.h>
42 #include <sys/stat.h>
43 #include <sys/mode.h>
44 #include <sys/acl.h>
45 #include <sys/dmu.h>
46 #include <sys/spa.h>
47 #include <sys/ddi.h>
48 
49 /*
50  * All the functions in this file are used to construct the log entries
51  * to record transactions. They allocate * a intent log transaction
52  * structure (itx_t) and save within it all the information necessary to
53  * possibly replay the transaction. The itx is then assigned a sequence
54  * number and inserted in the in-memory list anchored in the zilog.
55  */
56 
57 /*
58  * zfs_log_create() is used to handle TX_CREATE, TX_MKDIR and TX_MKXATTR
59  * transactions.
60  */
61 uint64_t
62 zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype,
63 	znode_t *dzp, znode_t *zp, char *name)
64 {
65 	itx_t *itx;
66 	uint64_t seq;
67 	lr_create_t *lr;
68 	size_t namesize = strlen(name) + 1;
69 
70 	if (zilog == NULL)
71 		return (0);
72 
73 	itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
74 	lr = (lr_create_t *)&itx->itx_lr;
75 	lr->lr_doid = dzp->z_id;
76 	lr->lr_foid = zp->z_id;
77 	lr->lr_mode = zp->z_phys->zp_mode;
78 	lr->lr_uid = zp->z_phys->zp_uid;
79 	lr->lr_gid = zp->z_phys->zp_gid;
80 	lr->lr_gen = zp->z_phys->zp_gen;
81 	lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
82 	lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
83 	lr->lr_rdev = zp->z_phys->zp_rdev;
84 	bcopy(name, (char *)(lr + 1), namesize);
85 
86 	seq = zil_itx_assign(zilog, itx, tx);
87 	dzp->z_last_itx = seq;
88 	zp->z_last_itx = seq;
89 	return (seq);
90 }
91 
92 /*
93  * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
94  */
95 uint64_t
96 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype,
97 	znode_t *dzp, char *name)
98 {
99 	itx_t *itx;
100 	uint64_t seq;
101 	lr_remove_t *lr;
102 	size_t namesize = strlen(name) + 1;
103 
104 	if (zilog == NULL)
105 		return (0);
106 
107 	itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
108 	lr = (lr_remove_t *)&itx->itx_lr;
109 	lr->lr_doid = dzp->z_id;
110 	bcopy(name, (char *)(lr + 1), namesize);
111 
112 	seq = zil_itx_assign(zilog, itx, tx);
113 	dzp->z_last_itx = seq;
114 	return (seq);
115 }
116 
117 /*
118  * zfs_log_link() handles TX_LINK transactions.
119  */
120 uint64_t
121 zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype,
122 	znode_t *dzp, znode_t *zp, char *name)
123 {
124 	itx_t *itx;
125 	uint64_t seq;
126 	lr_link_t *lr;
127 	size_t namesize = strlen(name) + 1;
128 
129 	if (zilog == NULL)
130 		return (0);
131 
132 	itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
133 	lr = (lr_link_t *)&itx->itx_lr;
134 	lr->lr_doid = dzp->z_id;
135 	lr->lr_link_obj = zp->z_id;
136 	bcopy(name, (char *)(lr + 1), namesize);
137 
138 	seq = zil_itx_assign(zilog, itx, tx);
139 	dzp->z_last_itx = seq;
140 	zp->z_last_itx = seq;
141 	return (seq);
142 }
143 
144 /*
145  * zfs_log_symlink() handles TX_SYMLINK transactions.
146  */
147 uint64_t
148 zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype,
149 	znode_t *dzp, znode_t *zp, char *name, char *link)
150 {
151 	itx_t *itx;
152 	uint64_t seq;
153 	lr_create_t *lr;
154 	size_t namesize = strlen(name) + 1;
155 	size_t linksize = strlen(link) + 1;
156 
157 	if (zilog == NULL)
158 		return (0);
159 
160 	itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
161 	lr = (lr_create_t *)&itx->itx_lr;
162 	lr->lr_doid = dzp->z_id;
163 	lr->lr_foid = zp->z_id;
164 	lr->lr_mode = zp->z_phys->zp_mode;
165 	lr->lr_uid = zp->z_phys->zp_uid;
166 	lr->lr_gid = zp->z_phys->zp_gid;
167 	lr->lr_gen = zp->z_phys->zp_gen;
168 	lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
169 	lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
170 	bcopy(name, (char *)(lr + 1), namesize);
171 	bcopy(link, (char *)(lr + 1) + namesize, linksize);
172 
173 	seq = zil_itx_assign(zilog, itx, tx);
174 	dzp->z_last_itx = seq;
175 	zp->z_last_itx = seq;
176 	return (seq);
177 }
178 
179 /*
180  * zfs_log_rename() handles TX_RENAME transactions.
181  */
182 uint64_t
183 zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype,
184 	znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
185 {
186 	itx_t *itx;
187 	uint64_t seq;
188 	lr_rename_t *lr;
189 	size_t snamesize = strlen(sname) + 1;
190 	size_t dnamesize = strlen(dname) + 1;
191 
192 	if (zilog == NULL)
193 		return (0);
194 
195 	itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
196 	lr = (lr_rename_t *)&itx->itx_lr;
197 	lr->lr_sdoid = sdzp->z_id;
198 	lr->lr_tdoid = tdzp->z_id;
199 	bcopy(sname, (char *)(lr + 1), snamesize);
200 	bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
201 
202 	seq = zil_itx_assign(zilog, itx, tx);
203 	sdzp->z_last_itx = seq;
204 	tdzp->z_last_itx = seq;
205 	szp->z_last_itx = seq;
206 	return (seq);
207 }
208 
209 /*
210  * zfs_log_write() handles TX_WRITE transactions.
211  */
212 ssize_t zfs_immediate_write_sz = 32768;
213 
214 uint64_t
215 zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
216 	znode_t *zp, offset_t off, ssize_t len, int ioflag, uio_t *uio)
217 {
218 	itx_t *itx;
219 	uint64_t seq;
220 	lr_write_t *lr;
221 	itx_wr_state_t write_state;
222 	size_t dlen;
223 	int err;
224 
225 	if (zilog == NULL || zp->z_reap)
226 		return (0);
227 
228 	/*
229 	 * Writes are handled in three different ways:
230 	 *
231 	 * WR_INDIRECT:
232 	 *    If the write is greater than zfs_immediate_write_sz then
233 	 *    later *if* we need to log the write then dmu_sync() is used
234 	 *    to immediately write the block and it's block pointer is put
235 	 *    in the log record.
236 	 * WR_COPIED:
237 	 *    If we know we'll immediately be committing the
238 	 *    transaction (FDSYNC (O_DSYNC)), the we allocate a larger
239 	 *    log record here for the data and copy the data in.
240 	 * WR_NEED_COPY:
241 	 *    Otherwise we don't allocate a buffer, and *if* we need to
242 	 *    flush the write later then a buffer is allocated and
243 	 *    we retrieve the data using the dmu.
244 	 */
245 	if (len > zfs_immediate_write_sz) {
246 		dlen = 0;
247 		write_state = WR_INDIRECT;
248 	} else if (ioflag & FDSYNC) {
249 		dlen = len;
250 		write_state = WR_COPIED;
251 	} else {
252 		dlen = 0;
253 		write_state = WR_NEED_COPY;
254 	}
255 	itx = zil_itx_create(txtype, sizeof (*lr) + dlen);
256 	if (write_state == WR_COPIED) {
257 		err = xcopyin(uio->uio_iov->iov_base - len,
258 		    (char *)itx + offsetof(itx_t, itx_lr) + sizeof (*lr), len);
259 		/*
260 		 * xcopyin shouldn't error as we've already successfully
261 		 * copied it to a dmu buffer. However if it does we'll get
262 		 * the data from the dmu later.
263 		 */
264 		if (err) {
265 			kmem_free(itx, offsetof(itx_t, itx_lr)
266 			    + itx->itx_lr.lrc_reclen);
267 			itx = zil_itx_create(txtype, sizeof (*lr));
268 			write_state = WR_NEED_COPY;
269 		}
270 	}
271 	itx->itx_wr_state = write_state;
272 	lr = (lr_write_t *)&itx->itx_lr;
273 	lr->lr_foid = zp->z_id;
274 	lr->lr_offset = off;
275 	lr->lr_length = len;
276 	lr->lr_blkoff = 0;
277 	BP_ZERO(&lr->lr_blkptr);
278 
279 	itx->itx_private = zp->z_zfsvfs;
280 
281 	seq = zil_itx_assign(zilog, itx, tx);
282 	zp->z_last_itx = seq;
283 	return (seq);
284 }
285 
286 /*
287  * zfs_log_truncate() handles TX_TRUNCATE transactions.
288  */
289 uint64_t
290 zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
291 	znode_t *zp, uint64_t off, uint64_t len)
292 {
293 	itx_t *itx;
294 	uint64_t seq;
295 	lr_truncate_t *lr;
296 
297 	if (zilog == NULL || zp->z_reap)
298 		return (0);
299 
300 	itx = zil_itx_create(txtype, sizeof (*lr));
301 	lr = (lr_truncate_t *)&itx->itx_lr;
302 	lr->lr_foid = zp->z_id;
303 	lr->lr_offset = off;
304 	lr->lr_length = len;
305 
306 	seq = zil_itx_assign(zilog, itx, tx);
307 	zp->z_last_itx = seq;
308 	return (seq);
309 }
310 
311 /*
312  * zfs_log_setattr() handles TX_SETATTR transactions.
313  */
314 uint64_t
315 zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
316 	znode_t *zp, vattr_t *vap, uint_t mask_applied)
317 {
318 	itx_t *itx;
319 	uint64_t seq;
320 	lr_setattr_t *lr;
321 
322 	if (zilog == NULL || zp->z_reap)
323 		return (0);
324 
325 	itx = zil_itx_create(txtype, sizeof (*lr));
326 	lr = (lr_setattr_t *)&itx->itx_lr;
327 	lr->lr_foid = zp->z_id;
328 	lr->lr_mask = (uint64_t)mask_applied;
329 	lr->lr_mode = (uint64_t)vap->va_mode;
330 	lr->lr_uid = (uint64_t)vap->va_uid;
331 	lr->lr_gid = (uint64_t)vap->va_gid;
332 	lr->lr_size = (uint64_t)vap->va_size;
333 	ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
334 	ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
335 
336 	seq = zil_itx_assign(zilog, itx, tx);
337 	zp->z_last_itx = seq;
338 	return (seq);
339 }
340 
341 /*
342  * zfs_log_acl() handles TX_ACL transactions.
343  */
344 uint64_t
345 zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype,
346 	znode_t *zp, int aclcnt, ace_t *z_ace)
347 {
348 	itx_t *itx;
349 	uint64_t seq;
350 	lr_acl_t *lr;
351 
352 	if (zilog == NULL || zp->z_reap)
353 		return (0);
354 
355 	itx = zil_itx_create(txtype, sizeof (*lr) + aclcnt * sizeof (ace_t));
356 	lr = (lr_acl_t *)&itx->itx_lr;
357 	lr->lr_foid = zp->z_id;
358 	lr->lr_aclcnt = (uint64_t)aclcnt;
359 	bcopy(z_ace, (ace_t *)(lr + 1), aclcnt * sizeof (ace_t));
360 
361 	seq = zil_itx_assign(zilog, itx, tx);
362 	zp->z_last_itx = seq;
363 	return (seq);
364 }
365