1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_METASLAB_IMPL_H 28 #define _SYS_METASLAB_IMPL_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/metaslab.h> 33 #include <sys/space_map.h> 34 #include <sys/vdev.h> 35 #include <sys/txg.h> 36 #include <sys/avl.h> 37 38 #ifdef __cplusplus 39 extern "C" { 40 #endif 41 42 struct metaslab_class { 43 metaslab_group_t *mc_rotor; 44 uint64_t mc_allocated; 45 }; 46 47 struct metaslab_group { 48 kmutex_t mg_lock; 49 avl_tree_t mg_metaslab_tree; 50 uint64_t mg_aliquot; 51 int64_t mg_bias; 52 metaslab_class_t *mg_class; 53 vdev_t *mg_vd; 54 metaslab_group_t *mg_prev; 55 metaslab_group_t *mg_next; 56 }; 57 58 /* 59 * Each metaslab's free block list is kept in its own DMU object in the 60 * metaslab freelist dataset. To minimize space consumption, the list 61 * is circular. 62 * 63 * Allocations and frees can happen in multiple transaction groups at 64 * the same time, which makes it a bit challening to keep the metaslab 65 * consistent. For example, we cannot allow frees from different 66 * transaction groups to be interleaved in the metaslab's free block list. 67 * 68 * We address this in several ways: 69 * 70 * We don't allow allocations from the same metaslab in concurrent 71 * transaction groups. metaslab_alloc() enforces this by checking 72 * the ms_last_alloc field, which specifies the last txg in which 73 * the metaslab was used for allocations. 74 * 75 * We can't segregate frees this way because we can't choose which 76 * DVAs someone wants to free. So we keep separate in-core freelists 77 * for each active transaction group. This in-core data is only 78 * written to the metaslab's on-disk freelist in metaslab_sync(), 79 * which solves the interleave problem: we only append frees from 80 * the syncing txg to the on-disk freelist, so the appends all occur 81 * in txg order. 82 * 83 * We cannot allow a block which was freed in a given txg to be 84 * allocated again until that txg has closed; otherwise, if we 85 * failed to sync that txg and had to roll back to txg - 1, 86 * changes in txg + 1 could have overwritten the data. Therefore, 87 * we partition the free blocks into "available" and "limbo" states. 88 * A block is available if the txg in which it was freed has closed; 89 * until then, the block is in limbo. Each time metaslab_sync() runs, 90 * if first adds any limbo blocks to the avail list, clears the limbo 91 * list, and starts writing the new limbo blocks (i.e. the ones that 92 * were freed in the syncing txg). 93 */ 94 95 struct metaslab { 96 kmutex_t ms_lock; /* metaslab lock */ 97 space_map_obj_t *ms_smo; /* space map object */ 98 uint64_t ms_last_alloc; /* txg of last alloc */ 99 uint64_t ms_usable_end; /* end of free_obj at last sync */ 100 uint64_t ms_usable_space; /* usable space at last sync */ 101 metaslab_group_t *ms_group; /* metaslab group */ 102 avl_node_t ms_group_node; /* node in metaslab group tree */ 103 uint64_t ms_weight; /* weight vs. others in group */ 104 uint8_t ms_dirty[TXG_SIZE]; /* per-txg dirty flags */ 105 space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */ 106 space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */ 107 txg_node_t ms_txg_node; /* per-txg dirty metaslab links */ 108 space_map_t ms_map; /* in-core free space map */ 109 uint8_t ms_map_incore; /* space map contents are valid */ 110 uint64_t ms_map_cursor[SPA_ASIZEBITS]; /* XXX -- PPD */ 111 }; 112 113 /* 114 * ms_dirty[] flags 115 */ 116 #define MSD_ALLOC 0x01 /* allocated from in this txg */ 117 #define MSD_FREE 0x02 /* freed to in this txg */ 118 #define MSD_ADD 0x04 /* added to the pool in this txg */ 119 #define MSD_CONDENSE 0x08 /* condensed in this txg */ 120 121 #ifdef __cplusplus 122 } 123 #endif 124 125 #endif /* _SYS_METASLAB_IMPL_H */ 126