xref: /titanic_50/usr/src/uts/common/fs/ufs/lufs_top.c (revision 36fe4a92b52649b0979d6a13212f4cea730d19c7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/systm.h>
30 #include <sys/types.h>
31 #include <sys/vnode.h>
32 #include <sys/errno.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/proc.h>
38 #include <sys/taskq.h>
39 #include <sys/cmn_err.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/fs/ufs_filio.h>
42 #include <sys/fs/ufs_log.h>
43 #include <sys/fs/ufs_bio.h>
44 
45 /*
46  * FILE SYSTEM INTERFACE TO TRANSACTION OPERATIONS (TOP; like VOP)
47  */
48 
49 uint_t topkey; /* tsd transaction key */
50 
51 /*
52  * declare a delta
53  */
54 void
55 top_delta(
56 	ufsvfs_t *ufsvfsp,
57 	offset_t mof,
58 	off_t nb,
59 	delta_t dtyp,
60 	int (*func)(),
61 	ulong_t arg)
62 {
63 	ml_unit_t		*ul	= ufsvfsp->vfs_log;
64 	threadtrans_t		*tp	= tsd_get(topkey);
65 
66 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
67 	ASSERT(nb);
68 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
69 		top_delta_debug(ul, mof, nb, dtyp));
70 
71 	deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp);
72 
73 	ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */
74 	if (tp) {
75 		tp->any_deltas = 1;
76 	}
77 }
78 
79 /*
80  * cancel a delta
81  */
82 void
83 top_cancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, int flags)
84 {
85 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
86 	int		metadata = flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA);
87 
88 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
89 	ASSERT(nb);
90 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
91 		(!(flags & metadata) ||
92 		top_delta_debug(ul, mof, nb, DT_CANCEL)));
93 
94 	if (metadata)
95 		deltamap_del(ul->un_deltamap, mof, nb);
96 
97 	logmap_cancel(ul, mof, nb, metadata);
98 
99 	/*
100 	 * needed for the roll thread's heuristic
101 	 */
102 	ul->un_logmap->mtm_ref = 1;
103 }
104 
105 /*
106  * check if this delta has been canceled (metadata -> userdata)
107  */
108 int
109 top_iscancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb)
110 {
111 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
112 
113 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
114 	ASSERT(nb);
115 	if (logmap_iscancel(ul->un_logmap, mof, nb))
116 		return (1);
117 	if (ul->un_flags & LDL_ERROR)
118 		return (1);
119 	return (0);
120 }
121 
122 /*
123  * put device into error state
124  */
125 void
126 top_seterror(ufsvfs_t *ufsvfsp)
127 {
128 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
129 
130 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
131 	ldl_seterror(ul, "ufs is forcing a ufs log error");
132 }
133 
134 /*
135  * issue a empty sync op to help empty the delta/log map or the log
136  */
137 static void
138 top_issue_sync(void *arg)
139 {
140 	ufsvfs_t *ufsvfsp = (ufsvfs_t *)arg;
141 	ml_unit_t *ul = (ml_unit_t *)ufsvfsp->vfs_log;
142 	mt_map_t *mtm = ul->un_logmap;
143 	int	error = 0;
144 
145 	if ((curthread->t_flag & T_DONTBLOCK) == 0)
146 		curthread->t_flag |= T_DONTBLOCK;
147 	top_begin_sync(ufsvfsp, TOP_COMMIT_ASYNC, 0, &error);
148 	if (!error) {
149 		top_end_sync(ufsvfsp, &error, TOP_COMMIT_ASYNC, 0);
150 	}
151 
152 	/*
153 	 * If we are a taskq thread, decrement mtm_taskq_sync_count and
154 	 * wake up the thread waiting on the mtm_cv if the mtm_taskq_sync_count
155 	 * hits zero.
156 	 */
157 
158 	if (taskq_member(system_taskq, curthread)) {
159 		mutex_enter(&mtm->mtm_lock);
160 		mtm->mtm_taskq_sync_count--;
161 		if (mtm->mtm_taskq_sync_count == 0) {
162 			cv_signal(&mtm->mtm_cv);
163 		}
164 		mutex_exit(&mtm->mtm_lock);
165 	}
166 }
167 
168 /*
169  * MOBY TRANSACTION ROUTINES
170  * begin a moby transaction
171  *	sync ops enter until first sync op finishes
172  *	async ops enter until last sync op finishes
173  * end a moby transaction
174  *		outstanding deltas are pushed thru log
175  *		log buffer is committed (incore only)
176  *		next trans is open to async ops
177  *		log buffer is committed on the log
178  *		next trans is open to sync ops
179  */
180 
181 /*ARGSUSED*/
182 void
183 top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error)
184 {
185 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
186 	mt_map_t	*mtm = ul->un_logmap;
187 	threadtrans_t	*tp;
188 	ushort_t	seq;
189 
190 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
191 	ASSERT(error != NULL);
192 	ASSERT(*error == 0);
193 
194 	mutex_enter(&mtm->mtm_lock);
195 	if (topid == TOP_FSYNC) {
196 		/*
197 		 * Error the fsync immediately if this is an nfs thread
198 		 * and its last transaction has already been committed.
199 		 * The only transactions outstanding are those
200 		 * where no commit has even started
201 		 * (last_async_tid == mtm->mtm_tid)
202 		 * or those where a commit is in progress
203 		 * (last_async_tid == mtm->mtm_committid)
204 		 */
205 		if (curthread->t_flag & T_DONTPEND) {
206 			tp = tsd_get(topkey);
207 			if (tp && (tp->last_async_tid != mtm->mtm_tid) &&
208 			    (tp->last_async_tid != mtm->mtm_committid)) {
209 				mutex_exit(&mtm->mtm_lock);
210 				*error = 1;
211 				return;
212 			}
213 		}
214 
215 		/*
216 		 * If there's already other synchronous transactions
217 		 * and we haven't allowed async ones to start yet
218 		 * then just wait for the commit to complete.
219 		 */
220 		if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
221 		    (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) {
222 			seq = mtm->mtm_seq;
223 			do {
224 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
225 			} while (seq == mtm->mtm_seq);
226 			mutex_exit(&mtm->mtm_lock);
227 			*error = 1;
228 			return;
229 		}
230 		if (mtm->mtm_closed & TOP_SYNC) {
231 			/*
232 			 * We know we're in the window where a thread is
233 			 * committing a transaction in top_end_sync() and
234 			 * has allowed async threads to start but hasn't
235 			 * got the completion on the commit write to
236 			 * allow sync threads to start.
237 			 * So wait for that commit completion then retest
238 			 * for the quick nfs check and if that fails
239 			 * go on to start a transaction
240 			 */
241 			seq = mtm->mtm_seq;
242 			do {
243 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
244 			} while (seq == mtm->mtm_seq);
245 
246 			/* tp is set above if T_DONTPEND */
247 			if ((curthread->t_flag & T_DONTPEND) && tp &&
248 			    (tp->last_async_tid != mtm->mtm_tid) &&
249 			    (tp->last_async_tid != mtm->mtm_committid)) {
250 				mutex_exit(&mtm->mtm_lock);
251 				*error = 1;
252 				return;
253 			}
254 		}
255 	}
256 retry:
257 	mtm->mtm_ref = 1;
258 	/*
259 	 * current transaction closed to sync ops; try for next transaction
260 	 */
261 	if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) {
262 		ulong_t		resv;
263 
264 		/*
265 		 * We know a commit is in progress, if we are trying to
266 		 * commit and we haven't allowed async ones to start yet,
267 		 * then just wait for the commit completion
268 		 */
269 		if ((size == TOP_COMMIT_SIZE) &&
270 		    (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
271 		    (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) {
272 			seq = mtm->mtm_seq;
273 			do {
274 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
275 			} while (seq == mtm->mtm_seq);
276 			mutex_exit(&mtm->mtm_lock);
277 			*error = 1;
278 			return;
279 		}
280 
281 		/*
282 		 * next transaction is full; try for next transaction
283 		 */
284 		resv = size + ul->un_resv_wantin + ul->un_resv;
285 		if (resv > ul->un_maxresv) {
286 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
287 			goto retry;
288 		}
289 		/*
290 		 * we are in the next transaction; wait for it to start
291 		 */
292 		mtm->mtm_wantin++;
293 		ul->un_resv_wantin += size;
294 		/*
295 		 * The corresponding cv_broadcast wakes up
296 		 * all threads that have been validated to go into
297 		 * the next transaction. However, because spurious
298 		 * cv_wait wakeups are possible we use a sequence
299 		 * number to check that the commit and cv_broadcast
300 		 * has really occurred. We couldn't use mtm_tid
301 		 * because on error that doesn't get incremented.
302 		 */
303 		seq = mtm->mtm_seq;
304 		do {
305 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
306 		} while (seq == mtm->mtm_seq);
307 	} else {
308 		/*
309 		 * if the current transaction is full; try the next one
310 		 */
311 		if (size && (ul->un_resv && ((size + ul->un_resv) >
312 		    ul->un_maxresv)) && !panicstr) {
313 			/*
314 			 * log is over reserved and no one will unresv the space
315 			 *	so generate empty sync op to unresv the space
316 			 */
317 			if (mtm->mtm_activesync == 0) {
318 				mutex_exit(&mtm->mtm_lock);
319 				top_issue_sync(ufsvfsp);
320 				mutex_enter(&mtm->mtm_lock);
321 				goto retry;
322 			}
323 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
324 			goto retry;
325 		}
326 		/*
327 		 * we are in the current transaction
328 		 */
329 		mtm->mtm_active++;
330 		mtm->mtm_activesync++;
331 		ul->un_resv += size;
332 	}
333 
334 	ASSERT(mtm->mtm_active > 0);
335 	ASSERT(mtm->mtm_activesync > 0);
336 	mutex_exit(&mtm->mtm_lock);
337 
338 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
339 		top_begin_debug(ul, topid, size));
340 }
341 
342 int tryfail_cnt;
343 
344 int
345 top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync)
346 {
347 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
348 	mt_map_t	*mtm	= ul->un_logmap;
349 	threadtrans_t   *tp;
350 
351 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
352 
353 	tp = tsd_get(topkey);
354 	if (tp == NULL) {
355 		tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP);
356 		(void) tsd_set(topkey, tp);
357 	}
358 	tp->deltas_size = 0;
359 	tp->any_deltas = 0;
360 
361 	mutex_enter(&mtm->mtm_lock);
362 retry:
363 	mtm->mtm_ref = 1;
364 	/*
365 	 * current transaction closed to async ops; try for next transaction
366 	 */
367 	if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) {
368 		if (tryasync) {
369 			mutex_exit(&mtm->mtm_lock);
370 			tryfail_cnt++;
371 			return (EWOULDBLOCK);
372 		}
373 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
374 		goto retry;
375 	}
376 
377 	/*
378 	 * if the current transaction is full; try the next one
379 	 */
380 	if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) &&
381 	    !panicstr) {
382 		/*
383 		 * log is overreserved and no one will unresv the space
384 		 *	so generate empty sync op to unresv the space
385 		 * We need TOP_SYNC_FORCED because we want to know when
386 		 * a top_end_sync is completed.
387 		 * mtm_taskq_sync_count is needed because we want to keep track
388 		 * of the pending top_issue_sync dispatches so that during
389 		 * forced umount we can wait for these to complete.
390 		 * mtm_taskq_sync_count is decremented in top_issue_sync and
391 		 * can remain set even after top_end_sync completes.
392 		 * We have a window between the clearing of TOP_SYNC_FORCED
393 		 * flag and the decrementing of mtm_taskq_sync_count.
394 		 * If in this window new async transactions start consuming
395 		 * log space, the log can get overreserved.
396 		 * Subsequently a new async transaction would fail to generate
397 		 * an empty sync transaction via the taskq, since it finds
398 		 * the mtm_taskq_sync_count set. This can cause a hang.
399 		 * Hence we do not test for mtm_taskq_sync_count being zero.
400 		 * Instead, the TOP_SYNC_FORCED flag is tested here.
401 		 */
402 		if ((mtm->mtm_activesync == 0) &&
403 		    (!(mtm->mtm_closed & TOP_SYNC_FORCED))) {
404 			/*
405 			 * Set flag to stop multiple forced empty
406 			 * sync transactions. Increment mtm_taskq_sync_count.
407 			 */
408 			mtm->mtm_closed |= TOP_SYNC_FORCED;
409 			mtm->mtm_taskq_sync_count++;
410 			mutex_exit(&mtm->mtm_lock);
411 			(void) taskq_dispatch(system_taskq,
412 			    top_issue_sync, ufsvfsp, TQ_SLEEP);
413 			if (tryasync) {
414 				tryfail_cnt++;
415 				return (EWOULDBLOCK);
416 			}
417 			mutex_enter(&mtm->mtm_lock);
418 			goto retry;
419 		}
420 		if (tryasync) {
421 			mutex_exit(&mtm->mtm_lock);
422 			tryfail_cnt++;
423 			return (EWOULDBLOCK);
424 		}
425 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
426 		goto retry;
427 	}
428 	/*
429 	 * we are in the current transaction
430 	 */
431 	mtm->mtm_active++;
432 	ul->un_resv += size;
433 
434 	ASSERT(mtm->mtm_active > 0);
435 	mutex_exit(&mtm->mtm_lock);
436 
437 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
438 		top_begin_debug(ul, topid, size));
439 	return (0);
440 }
441 
442 /*ARGSUSED*/
443 void
444 top_end_sync(ufsvfs_t *ufsvfsp, int *ep, top_t topid, ulong_t size)
445 {
446 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
447 	mt_map_t	*mtm	= ul->un_logmap;
448 	mapentry_t	*cancellist;
449 	uint32_t	tid;
450 
451 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
452 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
453 		top_end_debug(ul, mtm, topid, size));
454 
455 	mutex_enter(&mtm->mtm_lock);
456 	tid = mtm->mtm_tid;
457 
458 	mtm->mtm_activesync--;
459 	mtm->mtm_active--;
460 
461 	mtm->mtm_ref = 1;
462 
463 	/*
464 	 * wait for last syncop to complete
465 	 */
466 	if (mtm->mtm_activesync || panicstr) {
467 		ushort_t seq = mtm->mtm_seq;
468 
469 		mtm->mtm_closed = TOP_SYNC;
470 
471 		do {
472 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
473 		} while (seq == mtm->mtm_seq);
474 		mutex_exit(&mtm->mtm_lock);
475 		goto out;
476 	}
477 	/*
478 	 * last syncop; close current transaction to all ops
479 	 */
480 	mtm->mtm_closed = TOP_SYNC|TOP_ASYNC;
481 
482 	/*
483 	 * wait for last asyncop to finish
484 	 */
485 	while (mtm->mtm_active) {
486 		cv_wait(&mtm->mtm_cv_eot, &mtm->mtm_lock);
487 	}
488 
489 	/*
490 	 * push dirty metadata thru the log
491 	 */
492 	deltamap_push(ul);
493 
494 	ASSERT(((ul->un_debug & MT_FORCEROLL) == 0) ||
495 		top_roll_debug(ul));
496 
497 	mtm->mtm_tid = tid + 1;	/* can overflow to 0 */
498 
499 	/*
500 	 * Empty the cancellist, but save it for logmap_free_cancel
501 	 */
502 	mutex_enter(&mtm->mtm_mutex);
503 	cancellist = mtm->mtm_cancel;
504 	mtm->mtm_cancel = NULL;
505 	mutex_exit(&mtm->mtm_mutex);
506 
507 	/*
508 	 * allow async ops
509 	 */
510 	ASSERT(mtm->mtm_active == 0);
511 	ul->un_resv = 0; /* unreserve the log space */
512 	mtm->mtm_closed = TOP_SYNC;
513 	/*
514 	 * Hold the un_log_mutex here until we are done writing
515 	 * the commit record to prevent any more deltas to be written
516 	 * to the log after we allow async operations.
517 	 */
518 	mutex_enter(&ul->un_log_mutex);
519 	mutex_exit(&mtm->mtm_lock);
520 	cv_broadcast(&mtm->mtm_cv_next);
521 
522 	/*
523 	 * asynchronously write the commit record,
524 	 */
525 	logmap_commit(ul, tid);
526 
527 	/*
528 	 * wait for outstanding log writes (e.g., commits) to finish
529 	 */
530 	ldl_waito(ul);
531 
532 	/*
533 	 * Now that we are sure the commit has been written to the log
534 	 * we can free any canceled deltas.  If we free them before
535 	 * guaranteeing that the commit was written, we could panic before
536 	 * the commit, but after an async thread has allocated and written
537 	 * to canceled freed block.
538 	 */
539 
540 	logmap_free_cancel(mtm, &cancellist);
541 	mutex_exit(&ul->un_log_mutex);
542 
543 	/*
544 	 * now, allow all ops
545 	 */
546 	mutex_enter(&mtm->mtm_lock);
547 	mtm->mtm_active += mtm->mtm_wantin;
548 	ul->un_resv += ul->un_resv_wantin;
549 	mtm->mtm_activesync = mtm->mtm_wantin;
550 	mtm->mtm_wantin = 0;
551 	mtm->mtm_closed = 0;
552 	ul->un_resv_wantin = 0;
553 	mtm->mtm_committid = mtm->mtm_tid;
554 	mtm->mtm_seq++;
555 	mutex_exit(&mtm->mtm_lock);
556 
557 	/*
558 	 * Finish any other synchronous transactions and
559 	 * start any waiting new synchronous transactions
560 	 */
561 	cv_broadcast(&mtm->mtm_cv_commit);
562 
563 	/*
564 	 * if the logmap is getting full; roll something
565 	 */
566 	if (logmap_need_roll_sync(mtm)) {
567 		logmap_forceroll_nowait(mtm);
568 	}
569 
570 out:
571 	if (ul->un_flags & LDL_ERROR)
572 		*ep = EIO;
573 }
574 
575 /*ARGSUSED*/
576 void
577 top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size)
578 {
579 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
580 	mt_map_t	*mtm	= ul->un_logmap;
581 	threadtrans_t	*tp	= tsd_get(topkey);
582 	int		wakeup_needed = 0;
583 
584 	ASSERT(tp);
585 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
586 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
587 		top_end_debug(ul, mtm, topid, size));
588 
589 	mutex_enter(&mtm->mtm_lock);
590 
591 	if (size > tp->deltas_size) {
592 		ul->un_resv -= (size - tp->deltas_size);
593 	}
594 	if (tp->any_deltas) {
595 		tp->last_async_tid = mtm->mtm_tid;
596 	}
597 	mtm->mtm_ref = 1;
598 
599 	mtm->mtm_active--;
600 	if ((mtm->mtm_active == 0) &&
601 	    (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) {
602 		wakeup_needed = 1;
603 	}
604 	mutex_exit(&mtm->mtm_lock);
605 	if (wakeup_needed)
606 		cv_signal(&mtm->mtm_cv_eot);
607 
608 	/*
609 	 * Generate a sync op if the log, logmap, or deltamap are heavily used.
610 	 * Unless we are possibly holding any VM locks, since if we are holding
611 	 * any VM locks and we issue a top_end_sync(), we could deadlock.
612 	 */
613 	if ((mtm->mtm_activesync == 0) &&
614 	    !(mtm->mtm_closed & TOP_SYNC) &&
615 	    (deltamap_need_commit(ul->un_deltamap) ||
616 	    logmap_need_commit(mtm) ||
617 	    ldl_need_commit(ul)) &&
618 	    (topid != TOP_GETPAGE)) {
619 		top_issue_sync(ufsvfsp);
620 	}
621 	/*
622 	 * roll something from the log if the logmap is too full
623 	 */
624 	if (logmap_need_roll_async(mtm))
625 		logmap_forceroll_nowait(mtm);
626 }
627 
628 /*
629  * Called from roll thread;
630  *	buffer set for reading master
631  * Returns
632  *	0 - success, can continue with next buffer
633  *	1 - failure due to logmap deltas being in use
634  */
635 int
636 top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
637 {
638 	buf_t		*bp	= &rbp->rb_bh;
639 	offset_t	mof	= ldbtob(bp->b_blkno);
640 
641 	/*
642 	 * get a list of deltas
643 	 */
644 	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
645 		/* logmap deltas are in use */
646 		return (1);
647 	}
648 
649 	/*
650 	 * no deltas were found, nothing to roll
651 	 */
652 	if (rbp->rb_age == NULL) {
653 		bp->b_flags |= B_INVAL;
654 		return (0);
655 	}
656 
657 	/*
658 	 * If there is one cached roll buffer that cover all the deltas then
659 	 * we can use that instead of copying to a separate roll buffer.
660 	 */
661 	if (rbp->rb_crb) {
662 		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
663 		return (0);
664 	}
665 
666 	/*
667 	 * Set up the read.
668 	 * If no read is needed logmap_setup_read() returns 0.
669 	 */
670 	if (logmap_setup_read(rbp->rb_age, rbp)) {
671 		/*
672 		 * async read the data from master
673 		 */
674 		logstats.ls_rreads.value.ui64++;
675 		bp->b_bcount = MAPBLOCKSIZE;
676 		(void) bdev_strategy(bp);
677 		lwp_stat_update(LWP_STAT_INBLK, 1);
678 	} else {
679 		sema_v(&bp->b_io); /* mark read as complete */
680 	}
681 	return (0);
682 }
683 
684 int ufs_crb_enable = 1;
685 
686 /*
687  * move deltas from deltamap into the log
688  */
689 void
690 top_log(ufsvfs_t *ufsvfsp, char *va, offset_t vamof, off_t nb,
691     caddr_t buf, uint32_t bufsz)
692 {
693 	ml_unit_t	*ul = ufsvfsp->vfs_log;
694 	mapentry_t	*me;
695 	offset_t	hmof;
696 	uint32_t	hnb, nb1;
697 
698 	/*
699 	 * needed for the roll thread's heuristic
700 	 */
701 	ul->un_logmap->mtm_ref = 1;
702 
703 	if (buf && ufs_crb_enable) {
704 		ASSERT((bufsz & DEV_BMASK) == 0);
705 		/*
706 		 * Move any deltas to the logmap. Split requests that
707 		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
708 		 */
709 		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
710 		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
711 			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
712 			if (hnb > bufsz)
713 				hnb = bufsz;
714 			me = deltamap_remove(ul->un_deltamap,
715 			    MAX(hmof, vamof), MIN(hnb, nb1));
716 			if (me) {
717 				logmap_add_buf(ul, va, hmof, me, buf, hnb);
718 			}
719 		}
720 	} else {
721 		/*
722 		 * if there are deltas
723 		 */
724 		me = deltamap_remove(ul->un_deltamap, vamof, nb);
725 		if (me) {
726 			/*
727 			 * move to logmap
728 			 */
729 			logmap_add(ul, va, vamof, me);
730 		}
731 	}
732 
733 	ASSERT((ul->un_matamap == NULL) ||
734 		matamap_within(ul->un_matamap, vamof, nb));
735 }
736 
737 
738 static void
739 top_threadtrans_destroy(void *tp)
740 {
741 	kmem_free(tp, sizeof (threadtrans_t));
742 }
743 
744 void
745 _init_top(void)
746 {
747 	ASSERT(top_init_debug());
748 
749 	/*
750 	 * set up the delta layer
751 	 */
752 	_init_map();
753 
754 	/*
755 	 * Initialise the thread specific data transaction key
756 	 */
757 	tsd_create(&topkey, top_threadtrans_destroy);
758 }
759