xref: /titanic_51/usr/src/uts/common/fs/ufs/lufs_top.c (revision c10c16dec587a0662068f6e2991c29ed3a9db943)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/systm.h>
29 #include <sys/types.h>
30 #include <sys/vnode.h>
31 #include <sys/errno.h>
32 #include <sys/sysmacros.h>
33 #include <sys/debug.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/proc.h>
37 #include <sys/taskq.h>
38 #include <sys/cmn_err.h>
39 #include <sys/fs/ufs_inode.h>
40 #include <sys/fs/ufs_filio.h>
41 #include <sys/fs/ufs_log.h>
42 #include <sys/fs/ufs_bio.h>
43 
44 /*
45  * FILE SYSTEM INTERFACE TO TRANSACTION OPERATIONS (TOP; like VOP)
46  */
47 
48 uint_t topkey; /* tsd transaction key */
49 
50 /*
51  * declare a delta
52  */
53 void
54 top_delta(
55 	ufsvfs_t *ufsvfsp,
56 	offset_t mof,
57 	off_t nb,
58 	delta_t dtyp,
59 	int (*func)(),
60 	ulong_t arg)
61 {
62 	ml_unit_t		*ul	= ufsvfsp->vfs_log;
63 	threadtrans_t		*tp	= tsd_get(topkey);
64 
65 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
66 	ASSERT(nb);
67 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
68 	    top_delta_debug(ul, mof, nb, dtyp));
69 
70 	deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp);
71 
72 	ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */
73 	if (tp) {
74 		tp->any_deltas = 1;
75 	}
76 }
77 
78 /*
79  * cancel a delta
80  */
81 void
82 top_cancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, int flags)
83 {
84 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
85 	int		metadata = flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA);
86 
87 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
88 	ASSERT(nb);
89 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
90 	    (!(flags & metadata) ||
91 	    top_delta_debug(ul, mof, nb, DT_CANCEL)));
92 
93 	if (metadata)
94 		deltamap_del(ul->un_deltamap, mof, nb);
95 
96 	logmap_cancel(ul, mof, nb, metadata);
97 
98 	/*
99 	 * needed for the roll thread's heuristic
100 	 */
101 	ul->un_logmap->mtm_ref = 1;
102 }
103 
104 /*
105  * check if this delta has been canceled (metadata -> userdata)
106  */
107 int
108 top_iscancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb)
109 {
110 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
111 
112 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
113 	ASSERT(nb);
114 	if (logmap_iscancel(ul->un_logmap, mof, nb))
115 		return (1);
116 	if (ul->un_flags & LDL_ERROR)
117 		return (1);
118 	return (0);
119 }
120 
121 /*
122  * put device into error state
123  */
124 void
125 top_seterror(ufsvfs_t *ufsvfsp)
126 {
127 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
128 
129 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
130 	ldl_seterror(ul, "ufs is forcing a ufs log error");
131 }
132 
133 /*
134  * issue a empty sync op to help empty the delta/log map or the log
135  */
136 static void
137 top_issue_sync(void *arg)
138 {
139 	ufsvfs_t *ufsvfsp = (ufsvfs_t *)arg;
140 	ml_unit_t *ul = (ml_unit_t *)ufsvfsp->vfs_log;
141 	mt_map_t *mtm = ul->un_logmap;
142 	int	error = 0;
143 
144 	if ((curthread->t_flag & T_DONTBLOCK) == 0)
145 		curthread->t_flag |= T_DONTBLOCK;
146 	top_begin_sync(ufsvfsp, TOP_COMMIT_ASYNC, 0, &error);
147 	if (!error) {
148 		top_end_sync(ufsvfsp, &error, TOP_COMMIT_ASYNC, 0);
149 	}
150 
151 	/*
152 	 * If we are a taskq thread, decrement mtm_taskq_sync_count and
153 	 * wake up the thread waiting on the mtm_cv if the mtm_taskq_sync_count
154 	 * hits zero.
155 	 */
156 
157 	if (taskq_member(system_taskq, curthread)) {
158 		mutex_enter(&mtm->mtm_lock);
159 		mtm->mtm_taskq_sync_count--;
160 		if (mtm->mtm_taskq_sync_count == 0) {
161 			cv_signal(&mtm->mtm_cv);
162 		}
163 		mutex_exit(&mtm->mtm_lock);
164 	}
165 }
166 
167 /*
168  * MOBY TRANSACTION ROUTINES
169  * begin a moby transaction
170  *	sync ops enter until first sync op finishes
171  *	async ops enter until last sync op finishes
172  * end a moby transaction
173  *		outstanding deltas are pushed thru log
174  *		log buffer is committed (incore only)
175  *		next trans is open to async ops
176  *		log buffer is committed on the log
177  *		next trans is open to sync ops
178  */
179 
180 /*ARGSUSED*/
181 void
182 top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error)
183 {
184 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
185 	mt_map_t	*mtm = ul->un_logmap;
186 	threadtrans_t	*tp;
187 	ushort_t	seq;
188 
189 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
190 	ASSERT(error != NULL);
191 	ASSERT(*error == 0);
192 
193 	mutex_enter(&mtm->mtm_lock);
194 	if (topid == TOP_FSYNC) {
195 		/*
196 		 * Error the fsync immediately if this is an nfs thread
197 		 * and its last transaction has already been committed.
198 		 * The only transactions outstanding are those
199 		 * where no commit has even started
200 		 * (last_async_tid == mtm->mtm_tid)
201 		 * or those where a commit is in progress
202 		 * (last_async_tid == mtm->mtm_committid)
203 		 */
204 		if (curthread->t_flag & T_DONTPEND) {
205 			tp = tsd_get(topkey);
206 			if (tp && (tp->last_async_tid != mtm->mtm_tid) &&
207 			    (tp->last_async_tid != mtm->mtm_committid)) {
208 				mutex_exit(&mtm->mtm_lock);
209 				*error = 1;
210 				return;
211 			}
212 		}
213 
214 		/*
215 		 * If there's already other synchronous transactions
216 		 * and we haven't allowed async ones to start yet
217 		 * then just wait for the commit to complete.
218 		 */
219 		if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
220 		    (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) {
221 			seq = mtm->mtm_seq;
222 			do {
223 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
224 			} while (seq == mtm->mtm_seq);
225 			mutex_exit(&mtm->mtm_lock);
226 			*error = 1;
227 			return;
228 		}
229 		if (mtm->mtm_closed & TOP_SYNC) {
230 			/*
231 			 * We know we're in the window where a thread is
232 			 * committing a transaction in top_end_sync() and
233 			 * has allowed async threads to start but hasn't
234 			 * got the completion on the commit write to
235 			 * allow sync threads to start.
236 			 * So wait for that commit completion then retest
237 			 * for the quick nfs check and if that fails
238 			 * go on to start a transaction
239 			 */
240 			seq = mtm->mtm_seq;
241 			do {
242 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
243 			} while (seq == mtm->mtm_seq);
244 
245 			/* tp is set above if T_DONTPEND */
246 			if ((curthread->t_flag & T_DONTPEND) && tp &&
247 			    (tp->last_async_tid != mtm->mtm_tid) &&
248 			    (tp->last_async_tid != mtm->mtm_committid)) {
249 				mutex_exit(&mtm->mtm_lock);
250 				*error = 1;
251 				return;
252 			}
253 		}
254 	}
255 retry:
256 	mtm->mtm_ref = 1;
257 	/*
258 	 * current transaction closed to sync ops; try for next transaction
259 	 */
260 	if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) {
261 		ulong_t		resv;
262 
263 		/*
264 		 * We know a commit is in progress, if we are trying to
265 		 * commit and we haven't allowed async ones to start yet,
266 		 * then just wait for the commit completion
267 		 */
268 		if ((size == TOP_COMMIT_SIZE) &&
269 		    (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
270 		    (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) {
271 			seq = mtm->mtm_seq;
272 			do {
273 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
274 			} while (seq == mtm->mtm_seq);
275 			mutex_exit(&mtm->mtm_lock);
276 			*error = 1;
277 			return;
278 		}
279 
280 		/*
281 		 * next transaction is full; try for next transaction
282 		 */
283 		resv = size + ul->un_resv_wantin + ul->un_resv;
284 		if (resv > ul->un_maxresv) {
285 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
286 			goto retry;
287 		}
288 		/*
289 		 * we are in the next transaction; wait for it to start
290 		 */
291 		mtm->mtm_wantin++;
292 		ul->un_resv_wantin += size;
293 		/*
294 		 * The corresponding cv_broadcast wakes up
295 		 * all threads that have been validated to go into
296 		 * the next transaction. However, because spurious
297 		 * cv_wait wakeups are possible we use a sequence
298 		 * number to check that the commit and cv_broadcast
299 		 * has really occurred. We couldn't use mtm_tid
300 		 * because on error that doesn't get incremented.
301 		 */
302 		seq = mtm->mtm_seq;
303 		do {
304 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
305 		} while (seq == mtm->mtm_seq);
306 	} else {
307 		/*
308 		 * if the current transaction is full; try the next one
309 		 */
310 		if (size && (ul->un_resv && ((size + ul->un_resv) >
311 		    ul->un_maxresv)) && !panicstr) {
312 			/*
313 			 * log is over reserved and no one will unresv the space
314 			 *	so generate empty sync op to unresv the space
315 			 */
316 			if (mtm->mtm_activesync == 0) {
317 				mutex_exit(&mtm->mtm_lock);
318 				top_issue_sync(ufsvfsp);
319 				mutex_enter(&mtm->mtm_lock);
320 				goto retry;
321 			}
322 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
323 			goto retry;
324 		}
325 		/*
326 		 * we are in the current transaction
327 		 */
328 		mtm->mtm_active++;
329 		mtm->mtm_activesync++;
330 		ul->un_resv += size;
331 	}
332 
333 	ASSERT(mtm->mtm_active > 0);
334 	ASSERT(mtm->mtm_activesync > 0);
335 	mutex_exit(&mtm->mtm_lock);
336 
337 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
338 	    top_begin_debug(ul, topid, size));
339 }
340 
341 int tryfail_cnt;
342 
343 int
344 top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync)
345 {
346 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
347 	mt_map_t	*mtm	= ul->un_logmap;
348 	threadtrans_t   *tp;
349 
350 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
351 
352 	tp = tsd_get(topkey);
353 	if (tp == NULL) {
354 		tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP);
355 		(void) tsd_set(topkey, tp);
356 	}
357 	tp->deltas_size = 0;
358 	tp->any_deltas = 0;
359 
360 	mutex_enter(&mtm->mtm_lock);
361 retry:
362 	mtm->mtm_ref = 1;
363 	/*
364 	 * current transaction closed to async ops; try for next transaction
365 	 */
366 	if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) {
367 		if (tryasync) {
368 			mutex_exit(&mtm->mtm_lock);
369 			tryfail_cnt++;
370 			return (EWOULDBLOCK);
371 		}
372 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
373 		goto retry;
374 	}
375 
376 	/*
377 	 * if the current transaction is full; try the next one
378 	 */
379 	if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) &&
380 	    !panicstr) {
381 		/*
382 		 * log is overreserved and no one will unresv the space
383 		 *	so generate empty sync op to unresv the space
384 		 * We need TOP_SYNC_FORCED because we want to know when
385 		 * a top_end_sync is completed.
386 		 * mtm_taskq_sync_count is needed because we want to keep track
387 		 * of the pending top_issue_sync dispatches so that during
388 		 * forced umount we can wait for these to complete.
389 		 * mtm_taskq_sync_count is decremented in top_issue_sync and
390 		 * can remain set even after top_end_sync completes.
391 		 * We have a window between the clearing of TOP_SYNC_FORCED
392 		 * flag and the decrementing of mtm_taskq_sync_count.
393 		 * If in this window new async transactions start consuming
394 		 * log space, the log can get overreserved.
395 		 * Subsequently a new async transaction would fail to generate
396 		 * an empty sync transaction via the taskq, since it finds
397 		 * the mtm_taskq_sync_count set. This can cause a hang.
398 		 * Hence we do not test for mtm_taskq_sync_count being zero.
399 		 * Instead, the TOP_SYNC_FORCED flag is tested here.
400 		 */
401 		if ((mtm->mtm_activesync == 0) &&
402 		    (!(mtm->mtm_closed & TOP_SYNC_FORCED))) {
403 			/*
404 			 * Set flag to stop multiple forced empty
405 			 * sync transactions. Increment mtm_taskq_sync_count.
406 			 */
407 			mtm->mtm_closed |= TOP_SYNC_FORCED;
408 			mtm->mtm_taskq_sync_count++;
409 			mutex_exit(&mtm->mtm_lock);
410 			(void) taskq_dispatch(system_taskq,
411 			    top_issue_sync, ufsvfsp, TQ_SLEEP);
412 			if (tryasync) {
413 				tryfail_cnt++;
414 				return (EWOULDBLOCK);
415 			}
416 			mutex_enter(&mtm->mtm_lock);
417 			goto retry;
418 		}
419 		if (tryasync) {
420 			mutex_exit(&mtm->mtm_lock);
421 			tryfail_cnt++;
422 			return (EWOULDBLOCK);
423 		}
424 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
425 		goto retry;
426 	}
427 	/*
428 	 * we are in the current transaction
429 	 */
430 	mtm->mtm_active++;
431 	ul->un_resv += size;
432 
433 	ASSERT(mtm->mtm_active > 0);
434 	mutex_exit(&mtm->mtm_lock);
435 
436 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
437 	    top_begin_debug(ul, topid, size));
438 	return (0);
439 }
440 
441 /*ARGSUSED*/
442 void
443 top_end_sync(ufsvfs_t *ufsvfsp, int *ep, top_t topid, ulong_t size)
444 {
445 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
446 	mt_map_t	*mtm	= ul->un_logmap;
447 	mapentry_t	*cancellist;
448 	uint32_t	tid;
449 
450 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
451 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
452 	    top_end_debug(ul, mtm, topid, size));
453 
454 	mutex_enter(&mtm->mtm_lock);
455 	tid = mtm->mtm_tid;
456 
457 	mtm->mtm_activesync--;
458 	mtm->mtm_active--;
459 
460 	mtm->mtm_ref = 1;
461 
462 	/*
463 	 * wait for last syncop to complete
464 	 */
465 	if (mtm->mtm_activesync || panicstr) {
466 		ushort_t seq = mtm->mtm_seq;
467 
468 		mtm->mtm_closed = TOP_SYNC;
469 
470 		do {
471 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
472 		} while (seq == mtm->mtm_seq);
473 		mutex_exit(&mtm->mtm_lock);
474 		goto out;
475 	}
476 	/*
477 	 * last syncop; close current transaction to all ops
478 	 */
479 	mtm->mtm_closed = TOP_SYNC|TOP_ASYNC;
480 
481 	/*
482 	 * wait for last asyncop to finish
483 	 */
484 	while (mtm->mtm_active) {
485 		cv_wait(&mtm->mtm_cv_eot, &mtm->mtm_lock);
486 	}
487 
488 	/*
489 	 * push dirty metadata thru the log
490 	 */
491 	deltamap_push(ul);
492 
493 	ASSERT(((ul->un_debug & MT_FORCEROLL) == 0) ||
494 	    top_roll_debug(ul));
495 
496 	mtm->mtm_tid = tid + 1;	/* can overflow to 0 */
497 
498 	/*
499 	 * Empty the cancellist, but save it for logmap_free_cancel
500 	 */
501 	mutex_enter(&mtm->mtm_mutex);
502 	cancellist = mtm->mtm_cancel;
503 	mtm->mtm_cancel = NULL;
504 	mutex_exit(&mtm->mtm_mutex);
505 
506 	/*
507 	 * allow async ops
508 	 */
509 	ASSERT(mtm->mtm_active == 0);
510 	ul->un_resv = 0; /* unreserve the log space */
511 	mtm->mtm_closed = TOP_SYNC;
512 	/*
513 	 * Hold the un_log_mutex here until we are done writing
514 	 * the commit record to prevent any more deltas to be written
515 	 * to the log after we allow async operations.
516 	 */
517 	mutex_enter(&ul->un_log_mutex);
518 	mutex_exit(&mtm->mtm_lock);
519 	cv_broadcast(&mtm->mtm_cv_next);
520 
521 	/*
522 	 * asynchronously write the commit record,
523 	 */
524 	logmap_commit(ul, tid);
525 
526 	/*
527 	 * wait for outstanding log writes (e.g., commits) to finish
528 	 */
529 	ldl_waito(ul);
530 
531 	/*
532 	 * Now that we are sure the commit has been written to the log
533 	 * we can free any canceled deltas.  If we free them before
534 	 * guaranteeing that the commit was written, we could panic before
535 	 * the commit, but after an async thread has allocated and written
536 	 * to canceled freed block.
537 	 */
538 
539 	logmap_free_cancel(mtm, &cancellist);
540 	mutex_exit(&ul->un_log_mutex);
541 
542 	/*
543 	 * now, allow all ops
544 	 */
545 	mutex_enter(&mtm->mtm_lock);
546 	mtm->mtm_active += mtm->mtm_wantin;
547 	ul->un_resv += ul->un_resv_wantin;
548 	mtm->mtm_activesync = mtm->mtm_wantin;
549 	mtm->mtm_wantin = 0;
550 	mtm->mtm_closed = 0;
551 	ul->un_resv_wantin = 0;
552 	mtm->mtm_committid = mtm->mtm_tid;
553 	mtm->mtm_seq++;
554 	mutex_exit(&mtm->mtm_lock);
555 
556 	/*
557 	 * Finish any other synchronous transactions and
558 	 * start any waiting new synchronous transactions
559 	 */
560 	cv_broadcast(&mtm->mtm_cv_commit);
561 
562 	/*
563 	 * if the logmap is getting full; roll something
564 	 */
565 	if (logmap_need_roll_sync(mtm)) {
566 		logmap_forceroll_nowait(mtm);
567 	}
568 
569 out:
570 	if (ul->un_flags & LDL_ERROR)
571 		*ep = EIO;
572 }
573 
574 /*ARGSUSED*/
575 void
576 top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size)
577 {
578 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
579 	mt_map_t	*mtm	= ul->un_logmap;
580 	threadtrans_t	*tp	= tsd_get(topkey);
581 	int		wakeup_needed = 0;
582 
583 	ASSERT(tp);
584 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
585 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
586 	    top_end_debug(ul, mtm, topid, size));
587 
588 	mutex_enter(&mtm->mtm_lock);
589 
590 	if (size > tp->deltas_size) {
591 		ul->un_resv -= (size - tp->deltas_size);
592 	}
593 	if (tp->any_deltas) {
594 		tp->last_async_tid = mtm->mtm_tid;
595 	}
596 	mtm->mtm_ref = 1;
597 
598 	mtm->mtm_active--;
599 	if ((mtm->mtm_active == 0) &&
600 	    (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) {
601 		wakeup_needed = 1;
602 	}
603 	mutex_exit(&mtm->mtm_lock);
604 	if (wakeup_needed)
605 		cv_signal(&mtm->mtm_cv_eot);
606 
607 	/*
608 	 * Generate a sync op if the log, logmap, or deltamap are heavily used.
609 	 * Unless we are possibly holding any VM locks, since if we are holding
610 	 * any VM locks and we issue a top_end_sync(), we could deadlock.
611 	 */
612 	if ((mtm->mtm_activesync == 0) &&
613 	    !(mtm->mtm_closed & TOP_SYNC) &&
614 	    (deltamap_need_commit(ul->un_deltamap) ||
615 	    logmap_need_commit(mtm) ||
616 	    ldl_need_commit(ul)) &&
617 	    (topid != TOP_GETPAGE)) {
618 		top_issue_sync(ufsvfsp);
619 	}
620 	/*
621 	 * roll something from the log if the logmap is too full
622 	 */
623 	if (logmap_need_roll_async(mtm))
624 		logmap_forceroll_nowait(mtm);
625 }
626 
627 /*
628  * Called from roll thread;
629  *	buffer set for reading master
630  * Returns
631  *	0 - success, can continue with next buffer
632  *	1 - failure due to logmap deltas being in use
633  */
634 int
635 top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
636 {
637 	buf_t		*bp	= &rbp->rb_bh;
638 	offset_t	mof	= ldbtob(bp->b_blkno);
639 
640 	/*
641 	 * get a list of deltas
642 	 */
643 	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
644 		/* logmap deltas are in use */
645 		return (1);
646 	}
647 
648 	/*
649 	 * no deltas were found, nothing to roll
650 	 */
651 	if (rbp->rb_age == NULL) {
652 		bp->b_flags |= B_INVAL;
653 		return (0);
654 	}
655 
656 	/*
657 	 * If there is one cached roll buffer that cover all the deltas then
658 	 * we can use that instead of copying to a separate roll buffer.
659 	 */
660 	if (rbp->rb_crb) {
661 		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
662 		return (0);
663 	}
664 
665 	/*
666 	 * Set up the read.
667 	 * If no read is needed logmap_setup_read() returns 0.
668 	 */
669 	if (logmap_setup_read(rbp->rb_age, rbp)) {
670 		/*
671 		 * async read the data from master
672 		 */
673 		logstats.ls_rreads.value.ui64++;
674 		bp->b_bcount = MAPBLOCKSIZE;
675 		(void) bdev_strategy(bp);
676 		lwp_stat_update(LWP_STAT_INBLK, 1);
677 	} else {
678 		sema_v(&bp->b_io); /* mark read as complete */
679 	}
680 	return (0);
681 }
682 
683 int ufs_crb_enable = 1;
684 
685 /*
686  * move deltas from deltamap into the log
687  */
688 void
689 top_log(ufsvfs_t *ufsvfsp, char *va, offset_t vamof, off_t nb,
690     caddr_t buf, uint32_t bufsz)
691 {
692 	ml_unit_t	*ul = ufsvfsp->vfs_log;
693 	mapentry_t	*me;
694 	offset_t	hmof;
695 	uint32_t	hnb, nb1;
696 
697 	/*
698 	 * needed for the roll thread's heuristic
699 	 */
700 	ul->un_logmap->mtm_ref = 1;
701 
702 	if (buf && ufs_crb_enable) {
703 		ASSERT((bufsz & DEV_BMASK) == 0);
704 		/*
705 		 * Move any deltas to the logmap. Split requests that
706 		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
707 		 */
708 		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
709 		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
710 			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
711 			if (hnb > bufsz)
712 				hnb = bufsz;
713 			me = deltamap_remove(ul->un_deltamap,
714 			    MAX(hmof, vamof), MIN(hnb, nb1));
715 			if (me) {
716 				logmap_add_buf(ul, va, hmof, me, buf, hnb);
717 			}
718 		}
719 	} else {
720 		/*
721 		 * if there are deltas
722 		 */
723 		me = deltamap_remove(ul->un_deltamap, vamof, nb);
724 		if (me) {
725 			/*
726 			 * move to logmap
727 			 */
728 			logmap_add(ul, va, vamof, me);
729 		}
730 	}
731 
732 	ASSERT((ul->un_matamap == NULL) ||
733 	    matamap_within(ul->un_matamap, vamof, nb));
734 }
735 
736 
737 static void
738 top_threadtrans_destroy(void *tp)
739 {
740 	kmem_free(tp, sizeof (threadtrans_t));
741 }
742 
743 void
744 _init_top(void)
745 {
746 	ASSERT(top_init_debug());
747 
748 	/*
749 	 * set up the delta layer
750 	 */
751 	_init_map();
752 
753 	/*
754 	 * Initialise the thread specific data transaction key
755 	 */
756 	tsd_create(&topkey, top_threadtrans_destroy);
757 }
758