xref: /titanic_44/usr/src/uts/common/fs/zfs/space_map.c (revision a2cdcdd260232b58202b11a9bfc0103c9449ed52)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22d6e555bdSGeorge Wilson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fb09f5aaSMadhav Suresh /*
26bf16b11eSMatthew Ahrens  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
27fb09f5aaSMadhav Suresh  */
28fa9e4066Sahrens 
29fa9e4066Sahrens #include <sys/zfs_context.h>
30fa9e4066Sahrens #include <sys/spa.h>
31fa9e4066Sahrens #include <sys/dmu.h>
320713e232SGeorge Wilson #include <sys/dmu_tx.h>
330713e232SGeorge Wilson #include <sys/dnode.h>
340713e232SGeorge Wilson #include <sys/dsl_pool.h>
35ecc2d604Sbonwick #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/space_map.h>
370713e232SGeorge Wilson #include <sys/refcount.h>
380713e232SGeorge Wilson #include <sys/zfeature.h>
3901f55e48SGeorge Wilson 
40fa9e4066Sahrens /*
41b1be2892SMatthew Ahrens  * The data for a given space map can be kept on blocks of any size.
42b1be2892SMatthew Ahrens  * Larger blocks entail fewer i/o operations, but they also cause the
43b1be2892SMatthew Ahrens  * DMU to keep more data in-core, and also to waste more i/o bandwidth
44b1be2892SMatthew Ahrens  * when only a few blocks have changed since the last transaction group.
45fa9e4066Sahrens  */
46b1be2892SMatthew Ahrens int space_map_blksz = (1 << 12);
47fa9e4066Sahrens 
48ecc2d604Sbonwick /*
490713e232SGeorge Wilson  * Load the space map disk into the specified range tree. Segments of maptype
500713e232SGeorge Wilson  * are added to the range tree, other segment types are removed.
510713e232SGeorge Wilson  *
52ecc2d604Sbonwick  * Note: space_map_load() will drop sm_lock across dmu_read() calls.
53ecc2d604Sbonwick  * The caller must be OK with this.
54ecc2d604Sbonwick  */
55fa9e4066Sahrens int
space_map_load(space_map_t * sm,range_tree_t * rt,maptype_t maptype)560713e232SGeorge Wilson space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
57fa9e4066Sahrens {
58fa9e4066Sahrens 	uint64_t *entry, *entry_map, *entry_map_end;
598365e7c9Sbillm 	uint64_t bufsize, size, offset, end, space;
600a4e9518Sgw25295 	int error = 0;
61fa9e4066Sahrens 
62fa9e4066Sahrens 	ASSERT(MUTEX_HELD(sm->sm_lock));
63ecc2d604Sbonwick 
640713e232SGeorge Wilson 	end = space_map_length(sm);
650713e232SGeorge Wilson 	space = space_map_allocated(sm);
66ecc2d604Sbonwick 
670713e232SGeorge Wilson 	VERIFY0(range_tree_space(rt));
68fa9e4066Sahrens 
69fa9e4066Sahrens 	if (maptype == SM_FREE) {
700713e232SGeorge Wilson 		range_tree_add(rt, sm->sm_start, sm->sm_size);
71fa9e4066Sahrens 		space = sm->sm_size - space;
72fa9e4066Sahrens 	}
73fa9e4066Sahrens 
740713e232SGeorge Wilson 	bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
75ecc2d604Sbonwick 	entry_map = zio_buf_alloc(bufsize);
76ecc2d604Sbonwick 
77ecc2d604Sbonwick 	mutex_exit(sm->sm_lock);
780713e232SGeorge Wilson 	if (end > bufsize) {
79*a2cdcdd2SPaul Dagnelie 		dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
80*a2cdcdd2SPaul Dagnelie 		    end - bufsize, ZIO_PRIORITY_SYNC_READ);
810713e232SGeorge Wilson 	}
82ecc2d604Sbonwick 	mutex_enter(sm->sm_lock);
83ecc2d604Sbonwick 
84fa9e4066Sahrens 	for (offset = 0; offset < end; offset += bufsize) {
85fa9e4066Sahrens 		size = MIN(end - offset, bufsize);
86fa9e4066Sahrens 		VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
87fa9e4066Sahrens 		VERIFY(size != 0);
880713e232SGeorge Wilson 		ASSERT3U(sm->sm_blksz, !=, 0);
89fa9e4066Sahrens 
90fa9e4066Sahrens 		dprintf("object=%llu  offset=%llx  size=%llx\n",
910713e232SGeorge Wilson 		    space_map_object(sm), offset, size);
92ecc2d604Sbonwick 
93ecc2d604Sbonwick 		mutex_exit(sm->sm_lock);
940713e232SGeorge Wilson 		error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
950713e232SGeorge Wilson 		    entry_map, DMU_READ_PREFETCH);
96ecc2d604Sbonwick 		mutex_enter(sm->sm_lock);
970a4e9518Sgw25295 		if (error != 0)
98b8493d5dSvl146290 			break;
99fa9e4066Sahrens 
100fa9e4066Sahrens 		entry_map_end = entry_map + (size / sizeof (uint64_t));
101fa9e4066Sahrens 		for (entry = entry_map; entry < entry_map_end; entry++) {
102fa9e4066Sahrens 			uint64_t e = *entry;
1030713e232SGeorge Wilson 			uint64_t offset, size;
104fa9e4066Sahrens 
105fa9e4066Sahrens 			if (SM_DEBUG_DECODE(e))		/* Skip debug entries */
106fa9e4066Sahrens 				continue;
107fa9e4066Sahrens 
1080713e232SGeorge Wilson 			offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
1090713e232SGeorge Wilson 			    sm->sm_start;
1100713e232SGeorge Wilson 			size = SM_RUN_DECODE(e) << sm->sm_shift;
111b8493d5dSvl146290 
1120713e232SGeorge Wilson 			VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
1130713e232SGeorge Wilson 			VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift));
1140713e232SGeorge Wilson 			VERIFY3U(offset, >=, sm->sm_start);
1150713e232SGeorge Wilson 			VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
1160713e232SGeorge Wilson 			if (SM_TYPE_DECODE(e) == maptype) {
1170713e232SGeorge Wilson 				VERIFY3U(range_tree_space(rt) + size, <=,
1180713e232SGeorge Wilson 				    sm->sm_size);
1190713e232SGeorge Wilson 				range_tree_add(rt, offset, size);
120b8493d5dSvl146290 			} else {
1210713e232SGeorge Wilson 				range_tree_remove(rt, offset, size);
122b8493d5dSvl146290 			}
1230713e232SGeorge Wilson 		}
1240713e232SGeorge Wilson 	}
1250713e232SGeorge Wilson 
1260713e232SGeorge Wilson 	if (error == 0)
1270713e232SGeorge Wilson 		VERIFY3U(range_tree_space(rt), ==, space);
1280713e232SGeorge Wilson 	else
1290713e232SGeorge Wilson 		range_tree_vacate(rt, NULL, NULL);
130b8493d5dSvl146290 
131ecc2d604Sbonwick 	zio_buf_free(entry_map, bufsize);
1320a4e9518Sgw25295 	return (error);
133fa9e4066Sahrens }
134fa9e4066Sahrens 
135fa9e4066Sahrens void
space_map_histogram_clear(space_map_t * sm)1360713e232SGeorge Wilson space_map_histogram_clear(space_map_t *sm)
137ecc2d604Sbonwick {
1380713e232SGeorge Wilson 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1390713e232SGeorge Wilson 		return;
140ecc2d604Sbonwick 
1410713e232SGeorge Wilson 	bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
142ecc2d604Sbonwick }
143ecc2d604Sbonwick 
1440713e232SGeorge Wilson boolean_t
space_map_histogram_verify(space_map_t * sm,range_tree_t * rt)1450713e232SGeorge Wilson space_map_histogram_verify(space_map_t *sm, range_tree_t *rt)
146d6e555bdSGeorge Wilson {
1470713e232SGeorge Wilson 	/*
1480713e232SGeorge Wilson 	 * Verify that the in-core range tree does not have any
1490713e232SGeorge Wilson 	 * ranges smaller than our sm_shift size.
1500713e232SGeorge Wilson 	 */
1510713e232SGeorge Wilson 	for (int i = 0; i < sm->sm_shift; i++) {
1520713e232SGeorge Wilson 		if (rt->rt_histogram[i] != 0)
1530713e232SGeorge Wilson 			return (B_FALSE);
154d6e555bdSGeorge Wilson 	}
1550713e232SGeorge Wilson 	return (B_TRUE);
156ecc2d604Sbonwick }
157ecc2d604Sbonwick 
158ecc2d604Sbonwick void
space_map_histogram_add(space_map_t * sm,range_tree_t * rt,dmu_tx_t * tx)1590713e232SGeorge Wilson space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
160ecc2d604Sbonwick {
1610713e232SGeorge Wilson 	int idx = 0;
1620713e232SGeorge Wilson 
1630713e232SGeorge Wilson 	ASSERT(MUTEX_HELD(rt->rt_lock));
1640713e232SGeorge Wilson 	ASSERT(dmu_tx_is_syncing(tx));
1650713e232SGeorge Wilson 	VERIFY3U(space_map_object(sm), !=, 0);
1660713e232SGeorge Wilson 
1670713e232SGeorge Wilson 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1680713e232SGeorge Wilson 		return;
1690713e232SGeorge Wilson 
1700713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
1710713e232SGeorge Wilson 
1720713e232SGeorge Wilson 	ASSERT(space_map_histogram_verify(sm, rt));
1730713e232SGeorge Wilson 
1740713e232SGeorge Wilson 	/*
1750713e232SGeorge Wilson 	 * Transfer the content of the range tree histogram to the space
1760713e232SGeorge Wilson 	 * map histogram. The space map histogram contains 32 buckets ranging
1770713e232SGeorge Wilson 	 * between 2^sm_shift to 2^(32+sm_shift-1). The range tree,
1780713e232SGeorge Wilson 	 * however, can represent ranges from 2^0 to 2^63. Since the space
1790713e232SGeorge Wilson 	 * map only cares about allocatable blocks (minimum of sm_shift) we
1800713e232SGeorge Wilson 	 * can safely ignore all ranges in the range tree smaller than sm_shift.
1810713e232SGeorge Wilson 	 */
1820713e232SGeorge Wilson 	for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
1830713e232SGeorge Wilson 
1840713e232SGeorge Wilson 		/*
1850713e232SGeorge Wilson 		 * Since the largest histogram bucket in the space map is
1860713e232SGeorge Wilson 		 * 2^(32+sm_shift-1), we need to normalize the values in
1870713e232SGeorge Wilson 		 * the range tree for any bucket larger than that size. For
1880713e232SGeorge Wilson 		 * example given an sm_shift of 9, ranges larger than 2^40
1890713e232SGeorge Wilson 		 * would get normalized as if they were 1TB ranges. Assume
1900713e232SGeorge Wilson 		 * the range tree had a count of 5 in the 2^44 (16TB) bucket,
1910713e232SGeorge Wilson 		 * the calculation below would normalize this to 5 * 2^4 (16).
1920713e232SGeorge Wilson 		 */
1930713e232SGeorge Wilson 		ASSERT3U(i, >=, idx + sm->sm_shift);
1940713e232SGeorge Wilson 		sm->sm_phys->smp_histogram[idx] +=
1950713e232SGeorge Wilson 		    rt->rt_histogram[i] << (i - idx - sm->sm_shift);
1960713e232SGeorge Wilson 
1970713e232SGeorge Wilson 		/*
1980713e232SGeorge Wilson 		 * Increment the space map's index as long as we haven't
1990713e232SGeorge Wilson 		 * reached the maximum bucket size. Accumulate all ranges
2000713e232SGeorge Wilson 		 * larger than the max bucket size into the last bucket.
2010713e232SGeorge Wilson 		 */
2022e4c9986SGeorge Wilson 		if (idx < SPACE_MAP_HISTOGRAM_SIZE - 1) {
2030713e232SGeorge Wilson 			ASSERT3U(idx + sm->sm_shift, ==, i);
2040713e232SGeorge Wilson 			idx++;
2052e4c9986SGeorge Wilson 			ASSERT3U(idx, <, SPACE_MAP_HISTOGRAM_SIZE);
2060713e232SGeorge Wilson 		}
2070713e232SGeorge Wilson 	}
2080713e232SGeorge Wilson }
2090713e232SGeorge Wilson 
2100713e232SGeorge Wilson uint64_t
space_map_entries(space_map_t * sm,range_tree_t * rt)2110713e232SGeorge Wilson space_map_entries(space_map_t *sm, range_tree_t *rt)
2120713e232SGeorge Wilson {
2130713e232SGeorge Wilson 	avl_tree_t *t = &rt->rt_root;
2140713e232SGeorge Wilson 	range_seg_t *rs;
2150713e232SGeorge Wilson 	uint64_t size, entries;
2160713e232SGeorge Wilson 
2170713e232SGeorge Wilson 	/*
2180713e232SGeorge Wilson 	 * All space_maps always have a debug entry so account for it here.
2190713e232SGeorge Wilson 	 */
2200713e232SGeorge Wilson 	entries = 1;
2210713e232SGeorge Wilson 
2220713e232SGeorge Wilson 	/*
2230713e232SGeorge Wilson 	 * Traverse the range tree and calculate the number of space map
2240713e232SGeorge Wilson 	 * entries that would be required to write out the range tree.
2250713e232SGeorge Wilson 	 */
2260713e232SGeorge Wilson 	for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
2270713e232SGeorge Wilson 		size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
2280713e232SGeorge Wilson 		entries += howmany(size, SM_RUN_MAX);
2290713e232SGeorge Wilson 	}
2300713e232SGeorge Wilson 	return (entries);
231ecc2d604Sbonwick }
232ecc2d604Sbonwick 
233ecc2d604Sbonwick /*
2340713e232SGeorge Wilson  * Note: space_map_write() will drop sm_lock across dmu_write() calls.
235ecc2d604Sbonwick  */
236ecc2d604Sbonwick void
space_map_write(space_map_t * sm,range_tree_t * rt,maptype_t maptype,dmu_tx_t * tx)2370713e232SGeorge Wilson space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
2380713e232SGeorge Wilson     dmu_tx_t *tx)
239fa9e4066Sahrens {
2400713e232SGeorge Wilson 	objset_t *os = sm->sm_os;
241fa9e4066Sahrens 	spa_t *spa = dmu_objset_spa(os);
2420713e232SGeorge Wilson 	avl_tree_t *t = &rt->rt_root;
2430713e232SGeorge Wilson 	range_seg_t *rs;
2440713e232SGeorge Wilson 	uint64_t size, total, rt_space, nodes;
245fa9e4066Sahrens 	uint64_t *entry, *entry_map, *entry_map_end;
246b1be2892SMatthew Ahrens 	uint64_t expected_entries, actual_entries = 1;
247fa9e4066Sahrens 
2480713e232SGeorge Wilson 	ASSERT(MUTEX_HELD(rt->rt_lock));
2490713e232SGeorge Wilson 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
2500713e232SGeorge Wilson 	VERIFY3U(space_map_object(sm), !=, 0);
2510713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
252fa9e4066Sahrens 
2530713e232SGeorge Wilson 	/*
2540713e232SGeorge Wilson 	 * This field is no longer necessary since the in-core space map
2550713e232SGeorge Wilson 	 * now contains the object number but is maintained for backwards
2560713e232SGeorge Wilson 	 * compatibility.
2570713e232SGeorge Wilson 	 */
2580713e232SGeorge Wilson 	sm->sm_phys->smp_object = sm->sm_object;
2590713e232SGeorge Wilson 
2600713e232SGeorge Wilson 	if (range_tree_space(rt) == 0) {
2610713e232SGeorge Wilson 		VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object);
262fa9e4066Sahrens 		return;
2630713e232SGeorge Wilson 	}
264fa9e4066Sahrens 
265ecc2d604Sbonwick 	if (maptype == SM_ALLOC)
2660713e232SGeorge Wilson 		sm->sm_phys->smp_alloc += range_tree_space(rt);
267ecc2d604Sbonwick 	else
2680713e232SGeorge Wilson 		sm->sm_phys->smp_alloc -= range_tree_space(rt);
269ecc2d604Sbonwick 
2700713e232SGeorge Wilson 	expected_entries = space_map_entries(sm, rt);
2710713e232SGeorge Wilson 
2720713e232SGeorge Wilson 	entry_map = zio_buf_alloc(sm->sm_blksz);
2730713e232SGeorge Wilson 	entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
274fa9e4066Sahrens 	entry = entry_map;
275fa9e4066Sahrens 
276fa9e4066Sahrens 	*entry++ = SM_DEBUG_ENCODE(1) |
277fa9e4066Sahrens 	    SM_DEBUG_ACTION_ENCODE(maptype) |
278fa9e4066Sahrens 	    SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
279fa9e4066Sahrens 	    SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
280fa9e4066Sahrens 
28116a4a807SGeorge Wilson 	total = 0;
2820713e232SGeorge Wilson 	nodes = avl_numnodes(&rt->rt_root);
2830713e232SGeorge Wilson 	rt_space = range_tree_space(rt);
2840713e232SGeorge Wilson 	for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
2850713e232SGeorge Wilson 		uint64_t start;
286fa9e4066Sahrens 
2870713e232SGeorge Wilson 		size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
2880713e232SGeorge Wilson 		start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
289fa9e4066Sahrens 
2900713e232SGeorge Wilson 		total += size << sm->sm_shift;
2910713e232SGeorge Wilson 
2920713e232SGeorge Wilson 		while (size != 0) {
2930713e232SGeorge Wilson 			uint64_t run_len;
2940713e232SGeorge Wilson 
295fa9e4066Sahrens 			run_len = MIN(size, SM_RUN_MAX);
296fa9e4066Sahrens 
297fa9e4066Sahrens 			if (entry == entry_map_end) {
2980713e232SGeorge Wilson 				mutex_exit(rt->rt_lock);
2990713e232SGeorge Wilson 				dmu_write(os, space_map_object(sm),
3000713e232SGeorge Wilson 				    sm->sm_phys->smp_objsize, sm->sm_blksz,
3010713e232SGeorge Wilson 				    entry_map, tx);
3020713e232SGeorge Wilson 				mutex_enter(rt->rt_lock);
3030713e232SGeorge Wilson 				sm->sm_phys->smp_objsize += sm->sm_blksz;
304fa9e4066Sahrens 				entry = entry_map;
305fa9e4066Sahrens 			}
306fa9e4066Sahrens 
307fa9e4066Sahrens 			*entry++ = SM_OFFSET_ENCODE(start) |
308fa9e4066Sahrens 			    SM_TYPE_ENCODE(maptype) |
309fa9e4066Sahrens 			    SM_RUN_ENCODE(run_len);
310fa9e4066Sahrens 
311fa9e4066Sahrens 			start += run_len;
312fa9e4066Sahrens 			size -= run_len;
3130713e232SGeorge Wilson 			actual_entries++;
314fa9e4066Sahrens 		}
315fa9e4066Sahrens 	}
316fa9e4066Sahrens 
317fa9e4066Sahrens 	if (entry != entry_map) {
318fa9e4066Sahrens 		size = (entry - entry_map) * sizeof (uint64_t);
3190713e232SGeorge Wilson 		mutex_exit(rt->rt_lock);
3200713e232SGeorge Wilson 		dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
321fa9e4066Sahrens 		    size, entry_map, tx);
3220713e232SGeorge Wilson 		mutex_enter(rt->rt_lock);
3230713e232SGeorge Wilson 		sm->sm_phys->smp_objsize += size;
324fa9e4066Sahrens 	}
3250713e232SGeorge Wilson 	ASSERT3U(expected_entries, ==, actual_entries);
326fa9e4066Sahrens 
32701f55e48SGeorge Wilson 	/*
32801f55e48SGeorge Wilson 	 * Ensure that the space_map's accounting wasn't changed
32901f55e48SGeorge Wilson 	 * while we were in the middle of writing it out.
33001f55e48SGeorge Wilson 	 */
3310713e232SGeorge Wilson 	VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
3320713e232SGeorge Wilson 	VERIFY3U(range_tree_space(rt), ==, rt_space);
3330713e232SGeorge Wilson 	VERIFY3U(range_tree_space(rt), ==, total);
33401f55e48SGeorge Wilson 
3350713e232SGeorge Wilson 	zio_buf_free(entry_map, sm->sm_blksz);
336fa9e4066Sahrens }
337fa9e4066Sahrens 
3388ad4d6ddSJeff Bonwick static int
space_map_open_impl(space_map_t * sm)3390713e232SGeorge Wilson space_map_open_impl(space_map_t *sm)
3408ad4d6ddSJeff Bonwick {
3410713e232SGeorge Wilson 	int error;
3420713e232SGeorge Wilson 	u_longlong_t blocks;
3438ad4d6ddSJeff Bonwick 
3440713e232SGeorge Wilson 	error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf);
3450713e232SGeorge Wilson 	if (error)
3460713e232SGeorge Wilson 		return (error);
3478ad4d6ddSJeff Bonwick 
3480713e232SGeorge Wilson 	dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
3490713e232SGeorge Wilson 	sm->sm_phys = sm->sm_dbuf->db_data;
3500713e232SGeorge Wilson 	return (0);
3510713e232SGeorge Wilson }
3520713e232SGeorge Wilson 
3530713e232SGeorge Wilson int
space_map_open(space_map_t ** smp,objset_t * os,uint64_t object,uint64_t start,uint64_t size,uint8_t shift,kmutex_t * lp)3540713e232SGeorge Wilson space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
3550713e232SGeorge Wilson     uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp)
3560713e232SGeorge Wilson {
3570713e232SGeorge Wilson 	space_map_t *sm;
3580713e232SGeorge Wilson 	int error;
3590713e232SGeorge Wilson 
3600713e232SGeorge Wilson 	ASSERT(*smp == NULL);
3610713e232SGeorge Wilson 	ASSERT(os != NULL);
3620713e232SGeorge Wilson 	ASSERT(object != 0);
3630713e232SGeorge Wilson 
3640713e232SGeorge Wilson 	sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
3650713e232SGeorge Wilson 
3660713e232SGeorge Wilson 	sm->sm_start = start;
3670713e232SGeorge Wilson 	sm->sm_size = size;
3680713e232SGeorge Wilson 	sm->sm_shift = shift;
3690713e232SGeorge Wilson 	sm->sm_lock = lp;
3700713e232SGeorge Wilson 	sm->sm_os = os;
3710713e232SGeorge Wilson 	sm->sm_object = object;
3720713e232SGeorge Wilson 
3730713e232SGeorge Wilson 	error = space_map_open_impl(sm);
3740713e232SGeorge Wilson 	if (error != 0) {
3750713e232SGeorge Wilson 		space_map_close(sm);
3760713e232SGeorge Wilson 		return (error);
3770713e232SGeorge Wilson 	}
3780713e232SGeorge Wilson 
3790713e232SGeorge Wilson 	*smp = sm;
3808ad4d6ddSJeff Bonwick 
3818ad4d6ddSJeff Bonwick 	return (0);
3828ad4d6ddSJeff Bonwick }
3838ad4d6ddSJeff Bonwick 
3848ad4d6ddSJeff Bonwick void
space_map_close(space_map_t * sm)3850713e232SGeorge Wilson space_map_close(space_map_t *sm)
3868ad4d6ddSJeff Bonwick {
3870713e232SGeorge Wilson 	if (sm == NULL)
3880713e232SGeorge Wilson 		return;
3898ad4d6ddSJeff Bonwick 
3900713e232SGeorge Wilson 	if (sm->sm_dbuf != NULL)
3910713e232SGeorge Wilson 		dmu_buf_rele(sm->sm_dbuf, sm);
3920713e232SGeorge Wilson 	sm->sm_dbuf = NULL;
3930713e232SGeorge Wilson 	sm->sm_phys = NULL;
3948ad4d6ddSJeff Bonwick 
3950713e232SGeorge Wilson 	kmem_free(sm, sizeof (*sm));
3968ad4d6ddSJeff Bonwick }
3978ad4d6ddSJeff Bonwick 
3988ad4d6ddSJeff Bonwick void
space_map_truncate(space_map_t * sm,dmu_tx_t * tx)3990713e232SGeorge Wilson space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
4008ad4d6ddSJeff Bonwick {
4010713e232SGeorge Wilson 	objset_t *os = sm->sm_os;
4020713e232SGeorge Wilson 	spa_t *spa = dmu_objset_spa(os);
4030713e232SGeorge Wilson 	dmu_object_info_t doi;
4048ad4d6ddSJeff Bonwick 
4050713e232SGeorge Wilson 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
4060713e232SGeorge Wilson 	ASSERT(dmu_tx_is_syncing(tx));
4078ad4d6ddSJeff Bonwick 
4080713e232SGeorge Wilson 	dmu_object_info_from_db(sm->sm_dbuf, &doi);
4098ad4d6ddSJeff Bonwick 
410b1be2892SMatthew Ahrens 	/*
411b1be2892SMatthew Ahrens 	 * If the space map has the wrong bonus size (because
412b1be2892SMatthew Ahrens 	 * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
413b1be2892SMatthew Ahrens 	 * the wrong block size (because space_map_blksz has changed),
414b1be2892SMatthew Ahrens 	 * free and re-allocate its object with the updated sizes.
415b1be2892SMatthew Ahrens 	 *
416b1be2892SMatthew Ahrens 	 * Otherwise, just truncate the current object.
417b1be2892SMatthew Ahrens 	 */
418b1be2892SMatthew Ahrens 	if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
419b1be2892SMatthew Ahrens 	    doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
420b1be2892SMatthew Ahrens 	    doi.doi_data_block_size != space_map_blksz) {
4210713e232SGeorge Wilson 		zfs_dbgmsg("txg %llu, spa %s, reallocating: "
4220713e232SGeorge Wilson 		    "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
4230713e232SGeorge Wilson 		    spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
424b1be2892SMatthew Ahrens 
425b1be2892SMatthew Ahrens 		space_map_free(sm, tx);
426b1be2892SMatthew Ahrens 		dmu_buf_rele(sm->sm_dbuf, sm);
427b1be2892SMatthew Ahrens 
428b1be2892SMatthew Ahrens 		sm->sm_object = space_map_alloc(sm->sm_os, tx);
429b1be2892SMatthew Ahrens 		VERIFY0(space_map_open_impl(sm));
430b1be2892SMatthew Ahrens 	} else {
431b1be2892SMatthew Ahrens 		VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
432b1be2892SMatthew Ahrens 
433b1be2892SMatthew Ahrens 		/*
434b1be2892SMatthew Ahrens 		 * If the spacemap is reallocated, its histogram
435b1be2892SMatthew Ahrens 		 * will be reset.  Do the same in the common case so that
436b1be2892SMatthew Ahrens 		 * bugs related to the uncommon case do not go unnoticed.
437b1be2892SMatthew Ahrens 		 */
438b1be2892SMatthew Ahrens 		bzero(sm->sm_phys->smp_histogram,
439b1be2892SMatthew Ahrens 		    sizeof (sm->sm_phys->smp_histogram));
4400713e232SGeorge Wilson 	}
4410713e232SGeorge Wilson 
4420713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
4430713e232SGeorge Wilson 	sm->sm_phys->smp_objsize = 0;
4440713e232SGeorge Wilson 	sm->sm_phys->smp_alloc = 0;
4450713e232SGeorge Wilson }
4460713e232SGeorge Wilson 
4470713e232SGeorge Wilson /*
4480713e232SGeorge Wilson  * Update the in-core space_map allocation and length values.
4490713e232SGeorge Wilson  */
4500713e232SGeorge Wilson void
space_map_update(space_map_t * sm)4510713e232SGeorge Wilson space_map_update(space_map_t *sm)
4520713e232SGeorge Wilson {
4530713e232SGeorge Wilson 	if (sm == NULL)
4540713e232SGeorge Wilson 		return;
4550713e232SGeorge Wilson 
4560713e232SGeorge Wilson 	ASSERT(MUTEX_HELD(sm->sm_lock));
4570713e232SGeorge Wilson 
4580713e232SGeorge Wilson 	sm->sm_alloc = sm->sm_phys->smp_alloc;
4590713e232SGeorge Wilson 	sm->sm_length = sm->sm_phys->smp_objsize;
4600713e232SGeorge Wilson }
4610713e232SGeorge Wilson 
4620713e232SGeorge Wilson uint64_t
space_map_alloc(objset_t * os,dmu_tx_t * tx)4630713e232SGeorge Wilson space_map_alloc(objset_t *os, dmu_tx_t *tx)
4640713e232SGeorge Wilson {
4650713e232SGeorge Wilson 	spa_t *spa = dmu_objset_spa(os);
4660713e232SGeorge Wilson 	uint64_t object;
4670713e232SGeorge Wilson 	int bonuslen;
4680713e232SGeorge Wilson 
4692acef22dSMatthew Ahrens 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
4702acef22dSMatthew Ahrens 		spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
4710713e232SGeorge Wilson 		bonuslen = sizeof (space_map_phys_t);
4720713e232SGeorge Wilson 		ASSERT3U(bonuslen, <=, dmu_bonus_max());
4730713e232SGeorge Wilson 	} else {
4740713e232SGeorge Wilson 		bonuslen = SPACE_MAP_SIZE_V0;
4750713e232SGeorge Wilson 	}
4760713e232SGeorge Wilson 
4770713e232SGeorge Wilson 	object = dmu_object_alloc(os,
478b1be2892SMatthew Ahrens 	    DMU_OT_SPACE_MAP, space_map_blksz,
4790713e232SGeorge Wilson 	    DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
4800713e232SGeorge Wilson 
4810713e232SGeorge Wilson 	return (object);
4820713e232SGeorge Wilson }
4830713e232SGeorge Wilson 
4840713e232SGeorge Wilson void
space_map_free(space_map_t * sm,dmu_tx_t * tx)4850713e232SGeorge Wilson space_map_free(space_map_t *sm, dmu_tx_t *tx)
4860713e232SGeorge Wilson {
4870713e232SGeorge Wilson 	spa_t *spa;
4880713e232SGeorge Wilson 
4890713e232SGeorge Wilson 	if (sm == NULL)
4900713e232SGeorge Wilson 		return;
4910713e232SGeorge Wilson 
4920713e232SGeorge Wilson 	spa = dmu_objset_spa(sm->sm_os);
4932acef22dSMatthew Ahrens 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
4940713e232SGeorge Wilson 		dmu_object_info_t doi;
4950713e232SGeorge Wilson 
4960713e232SGeorge Wilson 		dmu_object_info_from_db(sm->sm_dbuf, &doi);
4970713e232SGeorge Wilson 		if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
4982acef22dSMatthew Ahrens 			VERIFY(spa_feature_is_active(spa,
4992acef22dSMatthew Ahrens 			    SPA_FEATURE_SPACEMAP_HISTOGRAM));
5002acef22dSMatthew Ahrens 			spa_feature_decr(spa,
5012acef22dSMatthew Ahrens 			    SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
5028ad4d6ddSJeff Bonwick 		}
5038ad4d6ddSJeff Bonwick 	}
5040713e232SGeorge Wilson 
5050713e232SGeorge Wilson 	VERIFY3U(dmu_object_free(sm->sm_os, space_map_object(sm), tx), ==, 0);
5060713e232SGeorge Wilson 	sm->sm_object = 0;
5078ad4d6ddSJeff Bonwick }
5080713e232SGeorge Wilson 
5090713e232SGeorge Wilson uint64_t
space_map_object(space_map_t * sm)5100713e232SGeorge Wilson space_map_object(space_map_t *sm)
5110713e232SGeorge Wilson {
5120713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_object : 0);
5130713e232SGeorge Wilson }
5140713e232SGeorge Wilson 
5150713e232SGeorge Wilson /*
5160713e232SGeorge Wilson  * Returns the already synced, on-disk allocated space.
5170713e232SGeorge Wilson  */
5180713e232SGeorge Wilson uint64_t
space_map_allocated(space_map_t * sm)5190713e232SGeorge Wilson space_map_allocated(space_map_t *sm)
5200713e232SGeorge Wilson {
5210713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_alloc : 0);
5220713e232SGeorge Wilson }
5230713e232SGeorge Wilson 
5240713e232SGeorge Wilson /*
5250713e232SGeorge Wilson  * Returns the already synced, on-disk length;
5260713e232SGeorge Wilson  */
5270713e232SGeorge Wilson uint64_t
space_map_length(space_map_t * sm)5280713e232SGeorge Wilson space_map_length(space_map_t *sm)
5290713e232SGeorge Wilson {
5300713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_length : 0);
5310713e232SGeorge Wilson }
5320713e232SGeorge Wilson 
5330713e232SGeorge Wilson /*
5340713e232SGeorge Wilson  * Returns the allocated space that is currently syncing.
5350713e232SGeorge Wilson  */
5360713e232SGeorge Wilson int64_t
space_map_alloc_delta(space_map_t * sm)5370713e232SGeorge Wilson space_map_alloc_delta(space_map_t *sm)
5380713e232SGeorge Wilson {
5390713e232SGeorge Wilson 	if (sm == NULL)
5400713e232SGeorge Wilson 		return (0);
5410713e232SGeorge Wilson 	ASSERT(sm->sm_dbuf != NULL);
5420713e232SGeorge Wilson 	return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
5438ad4d6ddSJeff Bonwick }
544