1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
22d6e555bdSGeorge Wilson * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23fa9e4066Sahrens * Use is subject to license terms.
24fa9e4066Sahrens */
25fb09f5aaSMadhav Suresh /*
26bf16b11eSMatthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
27fb09f5aaSMadhav Suresh */
28fa9e4066Sahrens
29fa9e4066Sahrens #include <sys/zfs_context.h>
30fa9e4066Sahrens #include <sys/spa.h>
31fa9e4066Sahrens #include <sys/dmu.h>
320713e232SGeorge Wilson #include <sys/dmu_tx.h>
330713e232SGeorge Wilson #include <sys/dnode.h>
340713e232SGeorge Wilson #include <sys/dsl_pool.h>
35ecc2d604Sbonwick #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/space_map.h>
370713e232SGeorge Wilson #include <sys/refcount.h>
380713e232SGeorge Wilson #include <sys/zfeature.h>
3901f55e48SGeorge Wilson
40fa9e4066Sahrens /*
41b1be2892SMatthew Ahrens * The data for a given space map can be kept on blocks of any size.
42b1be2892SMatthew Ahrens * Larger blocks entail fewer i/o operations, but they also cause the
43b1be2892SMatthew Ahrens * DMU to keep more data in-core, and also to waste more i/o bandwidth
44b1be2892SMatthew Ahrens * when only a few blocks have changed since the last transaction group.
45fa9e4066Sahrens */
46b1be2892SMatthew Ahrens int space_map_blksz = (1 << 12);
47fa9e4066Sahrens
48ecc2d604Sbonwick /*
490713e232SGeorge Wilson * Load the space map disk into the specified range tree. Segments of maptype
500713e232SGeorge Wilson * are added to the range tree, other segment types are removed.
510713e232SGeorge Wilson *
52ecc2d604Sbonwick * Note: space_map_load() will drop sm_lock across dmu_read() calls.
53ecc2d604Sbonwick * The caller must be OK with this.
54ecc2d604Sbonwick */
55fa9e4066Sahrens int
space_map_load(space_map_t * sm,range_tree_t * rt,maptype_t maptype)560713e232SGeorge Wilson space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
57fa9e4066Sahrens {
58fa9e4066Sahrens uint64_t *entry, *entry_map, *entry_map_end;
598365e7c9Sbillm uint64_t bufsize, size, offset, end, space;
600a4e9518Sgw25295 int error = 0;
61fa9e4066Sahrens
62fa9e4066Sahrens ASSERT(MUTEX_HELD(sm->sm_lock));
63ecc2d604Sbonwick
640713e232SGeorge Wilson end = space_map_length(sm);
650713e232SGeorge Wilson space = space_map_allocated(sm);
66ecc2d604Sbonwick
670713e232SGeorge Wilson VERIFY0(range_tree_space(rt));
68fa9e4066Sahrens
69fa9e4066Sahrens if (maptype == SM_FREE) {
700713e232SGeorge Wilson range_tree_add(rt, sm->sm_start, sm->sm_size);
71fa9e4066Sahrens space = sm->sm_size - space;
72fa9e4066Sahrens }
73fa9e4066Sahrens
740713e232SGeorge Wilson bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
75ecc2d604Sbonwick entry_map = zio_buf_alloc(bufsize);
76ecc2d604Sbonwick
77ecc2d604Sbonwick mutex_exit(sm->sm_lock);
780713e232SGeorge Wilson if (end > bufsize) {
79*a2cdcdd2SPaul Dagnelie dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
80*a2cdcdd2SPaul Dagnelie end - bufsize, ZIO_PRIORITY_SYNC_READ);
810713e232SGeorge Wilson }
82ecc2d604Sbonwick mutex_enter(sm->sm_lock);
83ecc2d604Sbonwick
84fa9e4066Sahrens for (offset = 0; offset < end; offset += bufsize) {
85fa9e4066Sahrens size = MIN(end - offset, bufsize);
86fa9e4066Sahrens VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
87fa9e4066Sahrens VERIFY(size != 0);
880713e232SGeorge Wilson ASSERT3U(sm->sm_blksz, !=, 0);
89fa9e4066Sahrens
90fa9e4066Sahrens dprintf("object=%llu offset=%llx size=%llx\n",
910713e232SGeorge Wilson space_map_object(sm), offset, size);
92ecc2d604Sbonwick
93ecc2d604Sbonwick mutex_exit(sm->sm_lock);
940713e232SGeorge Wilson error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
950713e232SGeorge Wilson entry_map, DMU_READ_PREFETCH);
96ecc2d604Sbonwick mutex_enter(sm->sm_lock);
970a4e9518Sgw25295 if (error != 0)
98b8493d5dSvl146290 break;
99fa9e4066Sahrens
100fa9e4066Sahrens entry_map_end = entry_map + (size / sizeof (uint64_t));
101fa9e4066Sahrens for (entry = entry_map; entry < entry_map_end; entry++) {
102fa9e4066Sahrens uint64_t e = *entry;
1030713e232SGeorge Wilson uint64_t offset, size;
104fa9e4066Sahrens
105fa9e4066Sahrens if (SM_DEBUG_DECODE(e)) /* Skip debug entries */
106fa9e4066Sahrens continue;
107fa9e4066Sahrens
1080713e232SGeorge Wilson offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
1090713e232SGeorge Wilson sm->sm_start;
1100713e232SGeorge Wilson size = SM_RUN_DECODE(e) << sm->sm_shift;
111b8493d5dSvl146290
1120713e232SGeorge Wilson VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
1130713e232SGeorge Wilson VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift));
1140713e232SGeorge Wilson VERIFY3U(offset, >=, sm->sm_start);
1150713e232SGeorge Wilson VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
1160713e232SGeorge Wilson if (SM_TYPE_DECODE(e) == maptype) {
1170713e232SGeorge Wilson VERIFY3U(range_tree_space(rt) + size, <=,
1180713e232SGeorge Wilson sm->sm_size);
1190713e232SGeorge Wilson range_tree_add(rt, offset, size);
120b8493d5dSvl146290 } else {
1210713e232SGeorge Wilson range_tree_remove(rt, offset, size);
122b8493d5dSvl146290 }
1230713e232SGeorge Wilson }
1240713e232SGeorge Wilson }
1250713e232SGeorge Wilson
1260713e232SGeorge Wilson if (error == 0)
1270713e232SGeorge Wilson VERIFY3U(range_tree_space(rt), ==, space);
1280713e232SGeorge Wilson else
1290713e232SGeorge Wilson range_tree_vacate(rt, NULL, NULL);
130b8493d5dSvl146290
131ecc2d604Sbonwick zio_buf_free(entry_map, bufsize);
1320a4e9518Sgw25295 return (error);
133fa9e4066Sahrens }
134fa9e4066Sahrens
135fa9e4066Sahrens void
space_map_histogram_clear(space_map_t * sm)1360713e232SGeorge Wilson space_map_histogram_clear(space_map_t *sm)
137ecc2d604Sbonwick {
1380713e232SGeorge Wilson if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1390713e232SGeorge Wilson return;
140ecc2d604Sbonwick
1410713e232SGeorge Wilson bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
142ecc2d604Sbonwick }
143ecc2d604Sbonwick
1440713e232SGeorge Wilson boolean_t
space_map_histogram_verify(space_map_t * sm,range_tree_t * rt)1450713e232SGeorge Wilson space_map_histogram_verify(space_map_t *sm, range_tree_t *rt)
146d6e555bdSGeorge Wilson {
1470713e232SGeorge Wilson /*
1480713e232SGeorge Wilson * Verify that the in-core range tree does not have any
1490713e232SGeorge Wilson * ranges smaller than our sm_shift size.
1500713e232SGeorge Wilson */
1510713e232SGeorge Wilson for (int i = 0; i < sm->sm_shift; i++) {
1520713e232SGeorge Wilson if (rt->rt_histogram[i] != 0)
1530713e232SGeorge Wilson return (B_FALSE);
154d6e555bdSGeorge Wilson }
1550713e232SGeorge Wilson return (B_TRUE);
156ecc2d604Sbonwick }
157ecc2d604Sbonwick
158ecc2d604Sbonwick void
space_map_histogram_add(space_map_t * sm,range_tree_t * rt,dmu_tx_t * tx)1590713e232SGeorge Wilson space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
160ecc2d604Sbonwick {
1610713e232SGeorge Wilson int idx = 0;
1620713e232SGeorge Wilson
1630713e232SGeorge Wilson ASSERT(MUTEX_HELD(rt->rt_lock));
1640713e232SGeorge Wilson ASSERT(dmu_tx_is_syncing(tx));
1650713e232SGeorge Wilson VERIFY3U(space_map_object(sm), !=, 0);
1660713e232SGeorge Wilson
1670713e232SGeorge Wilson if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1680713e232SGeorge Wilson return;
1690713e232SGeorge Wilson
1700713e232SGeorge Wilson dmu_buf_will_dirty(sm->sm_dbuf, tx);
1710713e232SGeorge Wilson
1720713e232SGeorge Wilson ASSERT(space_map_histogram_verify(sm, rt));
1730713e232SGeorge Wilson
1740713e232SGeorge Wilson /*
1750713e232SGeorge Wilson * Transfer the content of the range tree histogram to the space
1760713e232SGeorge Wilson * map histogram. The space map histogram contains 32 buckets ranging
1770713e232SGeorge Wilson * between 2^sm_shift to 2^(32+sm_shift-1). The range tree,
1780713e232SGeorge Wilson * however, can represent ranges from 2^0 to 2^63. Since the space
1790713e232SGeorge Wilson * map only cares about allocatable blocks (minimum of sm_shift) we
1800713e232SGeorge Wilson * can safely ignore all ranges in the range tree smaller than sm_shift.
1810713e232SGeorge Wilson */
1820713e232SGeorge Wilson for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
1830713e232SGeorge Wilson
1840713e232SGeorge Wilson /*
1850713e232SGeorge Wilson * Since the largest histogram bucket in the space map is
1860713e232SGeorge Wilson * 2^(32+sm_shift-1), we need to normalize the values in
1870713e232SGeorge Wilson * the range tree for any bucket larger than that size. For
1880713e232SGeorge Wilson * example given an sm_shift of 9, ranges larger than 2^40
1890713e232SGeorge Wilson * would get normalized as if they were 1TB ranges. Assume
1900713e232SGeorge Wilson * the range tree had a count of 5 in the 2^44 (16TB) bucket,
1910713e232SGeorge Wilson * the calculation below would normalize this to 5 * 2^4 (16).
1920713e232SGeorge Wilson */
1930713e232SGeorge Wilson ASSERT3U(i, >=, idx + sm->sm_shift);
1940713e232SGeorge Wilson sm->sm_phys->smp_histogram[idx] +=
1950713e232SGeorge Wilson rt->rt_histogram[i] << (i - idx - sm->sm_shift);
1960713e232SGeorge Wilson
1970713e232SGeorge Wilson /*
1980713e232SGeorge Wilson * Increment the space map's index as long as we haven't
1990713e232SGeorge Wilson * reached the maximum bucket size. Accumulate all ranges
2000713e232SGeorge Wilson * larger than the max bucket size into the last bucket.
2010713e232SGeorge Wilson */
2022e4c9986SGeorge Wilson if (idx < SPACE_MAP_HISTOGRAM_SIZE - 1) {
2030713e232SGeorge Wilson ASSERT3U(idx + sm->sm_shift, ==, i);
2040713e232SGeorge Wilson idx++;
2052e4c9986SGeorge Wilson ASSERT3U(idx, <, SPACE_MAP_HISTOGRAM_SIZE);
2060713e232SGeorge Wilson }
2070713e232SGeorge Wilson }
2080713e232SGeorge Wilson }
2090713e232SGeorge Wilson
2100713e232SGeorge Wilson uint64_t
space_map_entries(space_map_t * sm,range_tree_t * rt)2110713e232SGeorge Wilson space_map_entries(space_map_t *sm, range_tree_t *rt)
2120713e232SGeorge Wilson {
2130713e232SGeorge Wilson avl_tree_t *t = &rt->rt_root;
2140713e232SGeorge Wilson range_seg_t *rs;
2150713e232SGeorge Wilson uint64_t size, entries;
2160713e232SGeorge Wilson
2170713e232SGeorge Wilson /*
2180713e232SGeorge Wilson * All space_maps always have a debug entry so account for it here.
2190713e232SGeorge Wilson */
2200713e232SGeorge Wilson entries = 1;
2210713e232SGeorge Wilson
2220713e232SGeorge Wilson /*
2230713e232SGeorge Wilson * Traverse the range tree and calculate the number of space map
2240713e232SGeorge Wilson * entries that would be required to write out the range tree.
2250713e232SGeorge Wilson */
2260713e232SGeorge Wilson for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
2270713e232SGeorge Wilson size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
2280713e232SGeorge Wilson entries += howmany(size, SM_RUN_MAX);
2290713e232SGeorge Wilson }
2300713e232SGeorge Wilson return (entries);
231ecc2d604Sbonwick }
232ecc2d604Sbonwick
233ecc2d604Sbonwick /*
2340713e232SGeorge Wilson * Note: space_map_write() will drop sm_lock across dmu_write() calls.
235ecc2d604Sbonwick */
236ecc2d604Sbonwick void
space_map_write(space_map_t * sm,range_tree_t * rt,maptype_t maptype,dmu_tx_t * tx)2370713e232SGeorge Wilson space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
2380713e232SGeorge Wilson dmu_tx_t *tx)
239fa9e4066Sahrens {
2400713e232SGeorge Wilson objset_t *os = sm->sm_os;
241fa9e4066Sahrens spa_t *spa = dmu_objset_spa(os);
2420713e232SGeorge Wilson avl_tree_t *t = &rt->rt_root;
2430713e232SGeorge Wilson range_seg_t *rs;
2440713e232SGeorge Wilson uint64_t size, total, rt_space, nodes;
245fa9e4066Sahrens uint64_t *entry, *entry_map, *entry_map_end;
246b1be2892SMatthew Ahrens uint64_t expected_entries, actual_entries = 1;
247fa9e4066Sahrens
2480713e232SGeorge Wilson ASSERT(MUTEX_HELD(rt->rt_lock));
2490713e232SGeorge Wilson ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
2500713e232SGeorge Wilson VERIFY3U(space_map_object(sm), !=, 0);
2510713e232SGeorge Wilson dmu_buf_will_dirty(sm->sm_dbuf, tx);
252fa9e4066Sahrens
2530713e232SGeorge Wilson /*
2540713e232SGeorge Wilson * This field is no longer necessary since the in-core space map
2550713e232SGeorge Wilson * now contains the object number but is maintained for backwards
2560713e232SGeorge Wilson * compatibility.
2570713e232SGeorge Wilson */
2580713e232SGeorge Wilson sm->sm_phys->smp_object = sm->sm_object;
2590713e232SGeorge Wilson
2600713e232SGeorge Wilson if (range_tree_space(rt) == 0) {
2610713e232SGeorge Wilson VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object);
262fa9e4066Sahrens return;
2630713e232SGeorge Wilson }
264fa9e4066Sahrens
265ecc2d604Sbonwick if (maptype == SM_ALLOC)
2660713e232SGeorge Wilson sm->sm_phys->smp_alloc += range_tree_space(rt);
267ecc2d604Sbonwick else
2680713e232SGeorge Wilson sm->sm_phys->smp_alloc -= range_tree_space(rt);
269ecc2d604Sbonwick
2700713e232SGeorge Wilson expected_entries = space_map_entries(sm, rt);
2710713e232SGeorge Wilson
2720713e232SGeorge Wilson entry_map = zio_buf_alloc(sm->sm_blksz);
2730713e232SGeorge Wilson entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
274fa9e4066Sahrens entry = entry_map;
275fa9e4066Sahrens
276fa9e4066Sahrens *entry++ = SM_DEBUG_ENCODE(1) |
277fa9e4066Sahrens SM_DEBUG_ACTION_ENCODE(maptype) |
278fa9e4066Sahrens SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
279fa9e4066Sahrens SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
280fa9e4066Sahrens
28116a4a807SGeorge Wilson total = 0;
2820713e232SGeorge Wilson nodes = avl_numnodes(&rt->rt_root);
2830713e232SGeorge Wilson rt_space = range_tree_space(rt);
2840713e232SGeorge Wilson for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
2850713e232SGeorge Wilson uint64_t start;
286fa9e4066Sahrens
2870713e232SGeorge Wilson size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
2880713e232SGeorge Wilson start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
289fa9e4066Sahrens
2900713e232SGeorge Wilson total += size << sm->sm_shift;
2910713e232SGeorge Wilson
2920713e232SGeorge Wilson while (size != 0) {
2930713e232SGeorge Wilson uint64_t run_len;
2940713e232SGeorge Wilson
295fa9e4066Sahrens run_len = MIN(size, SM_RUN_MAX);
296fa9e4066Sahrens
297fa9e4066Sahrens if (entry == entry_map_end) {
2980713e232SGeorge Wilson mutex_exit(rt->rt_lock);
2990713e232SGeorge Wilson dmu_write(os, space_map_object(sm),
3000713e232SGeorge Wilson sm->sm_phys->smp_objsize, sm->sm_blksz,
3010713e232SGeorge Wilson entry_map, tx);
3020713e232SGeorge Wilson mutex_enter(rt->rt_lock);
3030713e232SGeorge Wilson sm->sm_phys->smp_objsize += sm->sm_blksz;
304fa9e4066Sahrens entry = entry_map;
305fa9e4066Sahrens }
306fa9e4066Sahrens
307fa9e4066Sahrens *entry++ = SM_OFFSET_ENCODE(start) |
308fa9e4066Sahrens SM_TYPE_ENCODE(maptype) |
309fa9e4066Sahrens SM_RUN_ENCODE(run_len);
310fa9e4066Sahrens
311fa9e4066Sahrens start += run_len;
312fa9e4066Sahrens size -= run_len;
3130713e232SGeorge Wilson actual_entries++;
314fa9e4066Sahrens }
315fa9e4066Sahrens }
316fa9e4066Sahrens
317fa9e4066Sahrens if (entry != entry_map) {
318fa9e4066Sahrens size = (entry - entry_map) * sizeof (uint64_t);
3190713e232SGeorge Wilson mutex_exit(rt->rt_lock);
3200713e232SGeorge Wilson dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
321fa9e4066Sahrens size, entry_map, tx);
3220713e232SGeorge Wilson mutex_enter(rt->rt_lock);
3230713e232SGeorge Wilson sm->sm_phys->smp_objsize += size;
324fa9e4066Sahrens }
3250713e232SGeorge Wilson ASSERT3U(expected_entries, ==, actual_entries);
326fa9e4066Sahrens
32701f55e48SGeorge Wilson /*
32801f55e48SGeorge Wilson * Ensure that the space_map's accounting wasn't changed
32901f55e48SGeorge Wilson * while we were in the middle of writing it out.
33001f55e48SGeorge Wilson */
3310713e232SGeorge Wilson VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
3320713e232SGeorge Wilson VERIFY3U(range_tree_space(rt), ==, rt_space);
3330713e232SGeorge Wilson VERIFY3U(range_tree_space(rt), ==, total);
33401f55e48SGeorge Wilson
3350713e232SGeorge Wilson zio_buf_free(entry_map, sm->sm_blksz);
336fa9e4066Sahrens }
337fa9e4066Sahrens
3388ad4d6ddSJeff Bonwick static int
space_map_open_impl(space_map_t * sm)3390713e232SGeorge Wilson space_map_open_impl(space_map_t *sm)
3408ad4d6ddSJeff Bonwick {
3410713e232SGeorge Wilson int error;
3420713e232SGeorge Wilson u_longlong_t blocks;
3438ad4d6ddSJeff Bonwick
3440713e232SGeorge Wilson error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf);
3450713e232SGeorge Wilson if (error)
3460713e232SGeorge Wilson return (error);
3478ad4d6ddSJeff Bonwick
3480713e232SGeorge Wilson dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
3490713e232SGeorge Wilson sm->sm_phys = sm->sm_dbuf->db_data;
3500713e232SGeorge Wilson return (0);
3510713e232SGeorge Wilson }
3520713e232SGeorge Wilson
3530713e232SGeorge Wilson int
space_map_open(space_map_t ** smp,objset_t * os,uint64_t object,uint64_t start,uint64_t size,uint8_t shift,kmutex_t * lp)3540713e232SGeorge Wilson space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
3550713e232SGeorge Wilson uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp)
3560713e232SGeorge Wilson {
3570713e232SGeorge Wilson space_map_t *sm;
3580713e232SGeorge Wilson int error;
3590713e232SGeorge Wilson
3600713e232SGeorge Wilson ASSERT(*smp == NULL);
3610713e232SGeorge Wilson ASSERT(os != NULL);
3620713e232SGeorge Wilson ASSERT(object != 0);
3630713e232SGeorge Wilson
3640713e232SGeorge Wilson sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
3650713e232SGeorge Wilson
3660713e232SGeorge Wilson sm->sm_start = start;
3670713e232SGeorge Wilson sm->sm_size = size;
3680713e232SGeorge Wilson sm->sm_shift = shift;
3690713e232SGeorge Wilson sm->sm_lock = lp;
3700713e232SGeorge Wilson sm->sm_os = os;
3710713e232SGeorge Wilson sm->sm_object = object;
3720713e232SGeorge Wilson
3730713e232SGeorge Wilson error = space_map_open_impl(sm);
3740713e232SGeorge Wilson if (error != 0) {
3750713e232SGeorge Wilson space_map_close(sm);
3760713e232SGeorge Wilson return (error);
3770713e232SGeorge Wilson }
3780713e232SGeorge Wilson
3790713e232SGeorge Wilson *smp = sm;
3808ad4d6ddSJeff Bonwick
3818ad4d6ddSJeff Bonwick return (0);
3828ad4d6ddSJeff Bonwick }
3838ad4d6ddSJeff Bonwick
3848ad4d6ddSJeff Bonwick void
space_map_close(space_map_t * sm)3850713e232SGeorge Wilson space_map_close(space_map_t *sm)
3868ad4d6ddSJeff Bonwick {
3870713e232SGeorge Wilson if (sm == NULL)
3880713e232SGeorge Wilson return;
3898ad4d6ddSJeff Bonwick
3900713e232SGeorge Wilson if (sm->sm_dbuf != NULL)
3910713e232SGeorge Wilson dmu_buf_rele(sm->sm_dbuf, sm);
3920713e232SGeorge Wilson sm->sm_dbuf = NULL;
3930713e232SGeorge Wilson sm->sm_phys = NULL;
3948ad4d6ddSJeff Bonwick
3950713e232SGeorge Wilson kmem_free(sm, sizeof (*sm));
3968ad4d6ddSJeff Bonwick }
3978ad4d6ddSJeff Bonwick
3988ad4d6ddSJeff Bonwick void
space_map_truncate(space_map_t * sm,dmu_tx_t * tx)3990713e232SGeorge Wilson space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
4008ad4d6ddSJeff Bonwick {
4010713e232SGeorge Wilson objset_t *os = sm->sm_os;
4020713e232SGeorge Wilson spa_t *spa = dmu_objset_spa(os);
4030713e232SGeorge Wilson dmu_object_info_t doi;
4048ad4d6ddSJeff Bonwick
4050713e232SGeorge Wilson ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
4060713e232SGeorge Wilson ASSERT(dmu_tx_is_syncing(tx));
4078ad4d6ddSJeff Bonwick
4080713e232SGeorge Wilson dmu_object_info_from_db(sm->sm_dbuf, &doi);
4098ad4d6ddSJeff Bonwick
410b1be2892SMatthew Ahrens /*
411b1be2892SMatthew Ahrens * If the space map has the wrong bonus size (because
412b1be2892SMatthew Ahrens * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
413b1be2892SMatthew Ahrens * the wrong block size (because space_map_blksz has changed),
414b1be2892SMatthew Ahrens * free and re-allocate its object with the updated sizes.
415b1be2892SMatthew Ahrens *
416b1be2892SMatthew Ahrens * Otherwise, just truncate the current object.
417b1be2892SMatthew Ahrens */
418b1be2892SMatthew Ahrens if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
419b1be2892SMatthew Ahrens doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
420b1be2892SMatthew Ahrens doi.doi_data_block_size != space_map_blksz) {
4210713e232SGeorge Wilson zfs_dbgmsg("txg %llu, spa %s, reallocating: "
4220713e232SGeorge Wilson "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
4230713e232SGeorge Wilson spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
424b1be2892SMatthew Ahrens
425b1be2892SMatthew Ahrens space_map_free(sm, tx);
426b1be2892SMatthew Ahrens dmu_buf_rele(sm->sm_dbuf, sm);
427b1be2892SMatthew Ahrens
428b1be2892SMatthew Ahrens sm->sm_object = space_map_alloc(sm->sm_os, tx);
429b1be2892SMatthew Ahrens VERIFY0(space_map_open_impl(sm));
430b1be2892SMatthew Ahrens } else {
431b1be2892SMatthew Ahrens VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
432b1be2892SMatthew Ahrens
433b1be2892SMatthew Ahrens /*
434b1be2892SMatthew Ahrens * If the spacemap is reallocated, its histogram
435b1be2892SMatthew Ahrens * will be reset. Do the same in the common case so that
436b1be2892SMatthew Ahrens * bugs related to the uncommon case do not go unnoticed.
437b1be2892SMatthew Ahrens */
438b1be2892SMatthew Ahrens bzero(sm->sm_phys->smp_histogram,
439b1be2892SMatthew Ahrens sizeof (sm->sm_phys->smp_histogram));
4400713e232SGeorge Wilson }
4410713e232SGeorge Wilson
4420713e232SGeorge Wilson dmu_buf_will_dirty(sm->sm_dbuf, tx);
4430713e232SGeorge Wilson sm->sm_phys->smp_objsize = 0;
4440713e232SGeorge Wilson sm->sm_phys->smp_alloc = 0;
4450713e232SGeorge Wilson }
4460713e232SGeorge Wilson
4470713e232SGeorge Wilson /*
4480713e232SGeorge Wilson * Update the in-core space_map allocation and length values.
4490713e232SGeorge Wilson */
4500713e232SGeorge Wilson void
space_map_update(space_map_t * sm)4510713e232SGeorge Wilson space_map_update(space_map_t *sm)
4520713e232SGeorge Wilson {
4530713e232SGeorge Wilson if (sm == NULL)
4540713e232SGeorge Wilson return;
4550713e232SGeorge Wilson
4560713e232SGeorge Wilson ASSERT(MUTEX_HELD(sm->sm_lock));
4570713e232SGeorge Wilson
4580713e232SGeorge Wilson sm->sm_alloc = sm->sm_phys->smp_alloc;
4590713e232SGeorge Wilson sm->sm_length = sm->sm_phys->smp_objsize;
4600713e232SGeorge Wilson }
4610713e232SGeorge Wilson
4620713e232SGeorge Wilson uint64_t
space_map_alloc(objset_t * os,dmu_tx_t * tx)4630713e232SGeorge Wilson space_map_alloc(objset_t *os, dmu_tx_t *tx)
4640713e232SGeorge Wilson {
4650713e232SGeorge Wilson spa_t *spa = dmu_objset_spa(os);
4660713e232SGeorge Wilson uint64_t object;
4670713e232SGeorge Wilson int bonuslen;
4680713e232SGeorge Wilson
4692acef22dSMatthew Ahrens if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
4702acef22dSMatthew Ahrens spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
4710713e232SGeorge Wilson bonuslen = sizeof (space_map_phys_t);
4720713e232SGeorge Wilson ASSERT3U(bonuslen, <=, dmu_bonus_max());
4730713e232SGeorge Wilson } else {
4740713e232SGeorge Wilson bonuslen = SPACE_MAP_SIZE_V0;
4750713e232SGeorge Wilson }
4760713e232SGeorge Wilson
4770713e232SGeorge Wilson object = dmu_object_alloc(os,
478b1be2892SMatthew Ahrens DMU_OT_SPACE_MAP, space_map_blksz,
4790713e232SGeorge Wilson DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
4800713e232SGeorge Wilson
4810713e232SGeorge Wilson return (object);
4820713e232SGeorge Wilson }
4830713e232SGeorge Wilson
4840713e232SGeorge Wilson void
space_map_free(space_map_t * sm,dmu_tx_t * tx)4850713e232SGeorge Wilson space_map_free(space_map_t *sm, dmu_tx_t *tx)
4860713e232SGeorge Wilson {
4870713e232SGeorge Wilson spa_t *spa;
4880713e232SGeorge Wilson
4890713e232SGeorge Wilson if (sm == NULL)
4900713e232SGeorge Wilson return;
4910713e232SGeorge Wilson
4920713e232SGeorge Wilson spa = dmu_objset_spa(sm->sm_os);
4932acef22dSMatthew Ahrens if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
4940713e232SGeorge Wilson dmu_object_info_t doi;
4950713e232SGeorge Wilson
4960713e232SGeorge Wilson dmu_object_info_from_db(sm->sm_dbuf, &doi);
4970713e232SGeorge Wilson if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
4982acef22dSMatthew Ahrens VERIFY(spa_feature_is_active(spa,
4992acef22dSMatthew Ahrens SPA_FEATURE_SPACEMAP_HISTOGRAM));
5002acef22dSMatthew Ahrens spa_feature_decr(spa,
5012acef22dSMatthew Ahrens SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
5028ad4d6ddSJeff Bonwick }
5038ad4d6ddSJeff Bonwick }
5040713e232SGeorge Wilson
5050713e232SGeorge Wilson VERIFY3U(dmu_object_free(sm->sm_os, space_map_object(sm), tx), ==, 0);
5060713e232SGeorge Wilson sm->sm_object = 0;
5078ad4d6ddSJeff Bonwick }
5080713e232SGeorge Wilson
5090713e232SGeorge Wilson uint64_t
space_map_object(space_map_t * sm)5100713e232SGeorge Wilson space_map_object(space_map_t *sm)
5110713e232SGeorge Wilson {
5120713e232SGeorge Wilson return (sm != NULL ? sm->sm_object : 0);
5130713e232SGeorge Wilson }
5140713e232SGeorge Wilson
5150713e232SGeorge Wilson /*
5160713e232SGeorge Wilson * Returns the already synced, on-disk allocated space.
5170713e232SGeorge Wilson */
5180713e232SGeorge Wilson uint64_t
space_map_allocated(space_map_t * sm)5190713e232SGeorge Wilson space_map_allocated(space_map_t *sm)
5200713e232SGeorge Wilson {
5210713e232SGeorge Wilson return (sm != NULL ? sm->sm_alloc : 0);
5220713e232SGeorge Wilson }
5230713e232SGeorge Wilson
5240713e232SGeorge Wilson /*
5250713e232SGeorge Wilson * Returns the already synced, on-disk length;
5260713e232SGeorge Wilson */
5270713e232SGeorge Wilson uint64_t
space_map_length(space_map_t * sm)5280713e232SGeorge Wilson space_map_length(space_map_t *sm)
5290713e232SGeorge Wilson {
5300713e232SGeorge Wilson return (sm != NULL ? sm->sm_length : 0);
5310713e232SGeorge Wilson }
5320713e232SGeorge Wilson
5330713e232SGeorge Wilson /*
5340713e232SGeorge Wilson * Returns the allocated space that is currently syncing.
5350713e232SGeorge Wilson */
5360713e232SGeorge Wilson int64_t
space_map_alloc_delta(space_map_t * sm)5370713e232SGeorge Wilson space_map_alloc_delta(space_map_t *sm)
5380713e232SGeorge Wilson {
5390713e232SGeorge Wilson if (sm == NULL)
5400713e232SGeorge Wilson return (0);
5410713e232SGeorge Wilson ASSERT(sm->sm_dbuf != NULL);
5420713e232SGeorge Wilson return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
5438ad4d6ddSJeff Bonwick }
544