18c2f7e86SDan Williams /* 28c2f7e86SDan Williams * Block Translation Table library 38c2f7e86SDan Williams * Copyright (c) 2014-2015, Intel Corporation. 48c2f7e86SDan Williams * 58c2f7e86SDan Williams * This program is free software; you can redistribute it and/or modify it 68c2f7e86SDan Williams * under the terms and conditions of the GNU General Public License, 78c2f7e86SDan Williams * version 2, as published by the Free Software Foundation. 88c2f7e86SDan Williams * 98c2f7e86SDan Williams * This program is distributed in the hope it will be useful, but WITHOUT 108c2f7e86SDan Williams * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 118c2f7e86SDan Williams * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 128c2f7e86SDan Williams * more details. 138c2f7e86SDan Williams */ 148c2f7e86SDan Williams 158c2f7e86SDan Williams #ifndef _LINUX_BTT_H 168c2f7e86SDan Williams #define _LINUX_BTT_H 178c2f7e86SDan Williams 18d9b83c75SVishal Verma #include <linux/badblocks.h> 198c2f7e86SDan Williams #include <linux/types.h> 208c2f7e86SDan Williams 218c2f7e86SDan Williams #define BTT_SIG_LEN 16 228c2f7e86SDan Williams #define BTT_SIG "BTT_ARENA_INFO\0" 235212e11fSVishal Verma #define MAP_ENT_SIZE 4 245212e11fSVishal Verma #define MAP_TRIM_SHIFT 31 255212e11fSVishal Verma #define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) 265212e11fSVishal Verma #define MAP_ERR_SHIFT 30 275212e11fSVishal Verma #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 285212e11fSVishal Verma #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 295212e11fSVishal Verma #define MAP_ENT_NORMAL 0xC0000000 30*24e3a7fbSVishal Verma #define LOG_GRP_SIZE sizeof(struct log_group) 315212e11fSVishal Verma #define LOG_ENT_SIZE sizeof(struct log_entry) 325212e11fSVishal Verma #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 335212e11fSVishal Verma #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ 345212e11fSVishal Verma #define RTT_VALID (1UL << 31) 355212e11fSVishal Verma #define RTT_INVALID 0 365212e11fSVishal Verma #define BTT_PG_SIZE 4096 375212e11fSVishal Verma #define BTT_DEFAULT_NFREE ND_MAX_LANES 385212e11fSVishal Verma #define LOG_SEQ_INIT 1 395212e11fSVishal Verma 405212e11fSVishal Verma #define IB_FLAG_ERROR 0x00000001 415212e11fSVishal Verma #define IB_FLAG_ERROR_MASK 0x00000001 425212e11fSVishal Verma 430595d539SVishal Verma #define ent_lba(ent) (ent & MAP_LBA_MASK) 440595d539SVishal Verma #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) 450595d539SVishal Verma #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) 46d9b83c75SVishal Verma #define set_e_flag(ent) (ent |= MAP_ERR_MASK) 470595d539SVishal Verma 485212e11fSVishal Verma enum btt_init_state { 495212e11fSVishal Verma INIT_UNCHECKED = 0, 505212e11fSVishal Verma INIT_NOTFOUND, 515212e11fSVishal Verma INIT_READY 525212e11fSVishal Verma }; 535212e11fSVishal Verma 54*24e3a7fbSVishal Verma /* 55*24e3a7fbSVishal Verma * A log group represents one log 'lane', and consists of four log entries. 56*24e3a7fbSVishal Verma * Two of the four entries are valid entries, and the remaining two are 57*24e3a7fbSVishal Verma * padding. Due to an old bug in the padding location, we need to perform a 58*24e3a7fbSVishal Verma * test to determine the padding scheme being used, and use that scheme 59*24e3a7fbSVishal Verma * thereafter. 60*24e3a7fbSVishal Verma * 61*24e3a7fbSVishal Verma * In kernels prior to 4.15, 'log group' would have actual log entries at 62*24e3a7fbSVishal Verma * indices (0, 2) and padding at indices (1, 3), where as the correct/updated 63*24e3a7fbSVishal Verma * format has log entries at indices (0, 1) and padding at indices (2, 3). 64*24e3a7fbSVishal Verma * 65*24e3a7fbSVishal Verma * Old (pre 4.15) format: 66*24e3a7fbSVishal Verma * +-----------------+-----------------+ 67*24e3a7fbSVishal Verma * | ent[0] | ent[1] | 68*24e3a7fbSVishal Verma * | 16B | 16B | 69*24e3a7fbSVishal Verma * | lba/old/new/seq | pad | 70*24e3a7fbSVishal Verma * +-----------------------------------+ 71*24e3a7fbSVishal Verma * | ent[2] | ent[3] | 72*24e3a7fbSVishal Verma * | 16B | 16B | 73*24e3a7fbSVishal Verma * | lba/old/new/seq | pad | 74*24e3a7fbSVishal Verma * +-----------------+-----------------+ 75*24e3a7fbSVishal Verma * 76*24e3a7fbSVishal Verma * New format: 77*24e3a7fbSVishal Verma * +-----------------+-----------------+ 78*24e3a7fbSVishal Verma * | ent[0] | ent[1] | 79*24e3a7fbSVishal Verma * | 16B | 16B | 80*24e3a7fbSVishal Verma * | lba/old/new/seq | lba/old/new/seq | 81*24e3a7fbSVishal Verma * +-----------------------------------+ 82*24e3a7fbSVishal Verma * | ent[2] | ent[3] | 83*24e3a7fbSVishal Verma * | 16B | 16B | 84*24e3a7fbSVishal Verma * | pad | pad | 85*24e3a7fbSVishal Verma * +-----------------+-----------------+ 86*24e3a7fbSVishal Verma * 87*24e3a7fbSVishal Verma * We detect during start-up which format is in use, and set 88*24e3a7fbSVishal Verma * arena->log_index[(0, 1)] with the detected format. 89*24e3a7fbSVishal Verma */ 90*24e3a7fbSVishal Verma 915212e11fSVishal Verma struct log_entry { 925212e11fSVishal Verma __le32 lba; 935212e11fSVishal Verma __le32 old_map; 945212e11fSVishal Verma __le32 new_map; 955212e11fSVishal Verma __le32 seq; 96*24e3a7fbSVishal Verma }; 97*24e3a7fbSVishal Verma 98*24e3a7fbSVishal Verma struct log_group { 99*24e3a7fbSVishal Verma struct log_entry ent[4]; 1005212e11fSVishal Verma }; 1018c2f7e86SDan Williams 1028c2f7e86SDan Williams struct btt_sb { 1038c2f7e86SDan Williams u8 signature[BTT_SIG_LEN]; 1048c2f7e86SDan Williams u8 uuid[16]; 1058c2f7e86SDan Williams u8 parent_uuid[16]; 1068c2f7e86SDan Williams __le32 flags; 1078c2f7e86SDan Williams __le16 version_major; 1088c2f7e86SDan Williams __le16 version_minor; 1098c2f7e86SDan Williams __le32 external_lbasize; 1108c2f7e86SDan Williams __le32 external_nlba; 1118c2f7e86SDan Williams __le32 internal_lbasize; 1128c2f7e86SDan Williams __le32 internal_nlba; 1138c2f7e86SDan Williams __le32 nfree; 1148c2f7e86SDan Williams __le32 infosize; 1158c2f7e86SDan Williams __le64 nextoff; 1168c2f7e86SDan Williams __le64 dataoff; 1178c2f7e86SDan Williams __le64 mapoff; 1188c2f7e86SDan Williams __le64 logoff; 1198c2f7e86SDan Williams __le64 info2off; 1208c2f7e86SDan Williams u8 padding[3968]; 1218c2f7e86SDan Williams __le64 checksum; 1228c2f7e86SDan Williams }; 1238c2f7e86SDan Williams 1245212e11fSVishal Verma struct free_entry { 1255212e11fSVishal Verma u32 block; 1265212e11fSVishal Verma u8 sub; 1275212e11fSVishal Verma u8 seq; 128d9b83c75SVishal Verma u8 has_err; 1295212e11fSVishal Verma }; 1305212e11fSVishal Verma 1315212e11fSVishal Verma struct aligned_lock { 1325212e11fSVishal Verma union { 1335212e11fSVishal Verma spinlock_t lock; 1345212e11fSVishal Verma u8 cacheline_padding[L1_CACHE_BYTES]; 1355212e11fSVishal Verma }; 1365212e11fSVishal Verma }; 1375212e11fSVishal Verma 1385212e11fSVishal Verma /** 1395212e11fSVishal Verma * struct arena_info - handle for an arena 1405212e11fSVishal Verma * @size: Size in bytes this arena occupies on the raw device. 1415212e11fSVishal Verma * This includes arena metadata. 1425212e11fSVishal Verma * @external_lba_start: The first external LBA in this arena. 1435212e11fSVishal Verma * @internal_nlba: Number of internal blocks available in the arena 1445212e11fSVishal Verma * including nfree reserved blocks 1455212e11fSVishal Verma * @internal_lbasize: Internal and external lba sizes may be different as 1465212e11fSVishal Verma * we can round up 'odd' external lbasizes such as 520B 1475212e11fSVishal Verma * to be aligned. 1485212e11fSVishal Verma * @external_nlba: Number of blocks contributed by the arena to the number 1495212e11fSVishal Verma * reported to upper layers. (internal_nlba - nfree) 1505212e11fSVishal Verma * @external_lbasize: LBA size as exposed to upper layers. 1515212e11fSVishal Verma * @nfree: A reserve number of 'free' blocks that is used to 1525212e11fSVishal Verma * handle incoming writes. 1535212e11fSVishal Verma * @version_major: Metadata layout version major. 1545212e11fSVishal Verma * @version_minor: Metadata layout version minor. 15575892004SVishal Verma * @sector_size: The Linux sector size - 512 or 4096 1565212e11fSVishal Verma * @nextoff: Offset in bytes to the start of the next arena. 1575212e11fSVishal Verma * @infooff: Offset in bytes to the info block of this arena. 1585212e11fSVishal Verma * @dataoff: Offset in bytes to the data area of this arena. 1595212e11fSVishal Verma * @mapoff: Offset in bytes to the map area of this arena. 1605212e11fSVishal Verma * @logoff: Offset in bytes to the log area of this arena. 1615212e11fSVishal Verma * @info2off: Offset in bytes to the backup info block of this arena. 1625212e11fSVishal Verma * @freelist: Pointer to in-memory list of free blocks 1635212e11fSVishal Verma * @rtt: Pointer to in-memory "Read Tracking Table" 1645212e11fSVishal Verma * @map_locks: Spinlocks protecting concurrent map writes 1655212e11fSVishal Verma * @nd_btt: Pointer to parent nd_btt structure. 1665212e11fSVishal Verma * @list: List head for list of arenas 1675212e11fSVishal Verma * @debugfs_dir: Debugfs dentry 1685212e11fSVishal Verma * @flags: Arena flags - may signify error states. 16913b7954cSVishal Verma * @err_lock: Mutex for synchronizing error clearing. 170*24e3a7fbSVishal Verma * @log_index: Indices of the valid log entries in a log_group 1715212e11fSVishal Verma * 1725212e11fSVishal Verma * arena_info is a per-arena handle. Once an arena is narrowed down for an 1735212e11fSVishal Verma * IO, this struct is passed around for the duration of the IO. 1745212e11fSVishal Verma */ 1755212e11fSVishal Verma struct arena_info { 1765212e11fSVishal Verma u64 size; /* Total bytes for this arena */ 1775212e11fSVishal Verma u64 external_lba_start; 1785212e11fSVishal Verma u32 internal_nlba; 1795212e11fSVishal Verma u32 internal_lbasize; 1805212e11fSVishal Verma u32 external_nlba; 1815212e11fSVishal Verma u32 external_lbasize; 1825212e11fSVishal Verma u32 nfree; 1835212e11fSVishal Verma u16 version_major; 1845212e11fSVishal Verma u16 version_minor; 18575892004SVishal Verma u32 sector_size; 1865212e11fSVishal Verma /* Byte offsets to the different on-media structures */ 1875212e11fSVishal Verma u64 nextoff; 1885212e11fSVishal Verma u64 infooff; 1895212e11fSVishal Verma u64 dataoff; 1905212e11fSVishal Verma u64 mapoff; 1915212e11fSVishal Verma u64 logoff; 1925212e11fSVishal Verma u64 info2off; 1935212e11fSVishal Verma /* Pointers to other in-memory structures for this arena */ 1945212e11fSVishal Verma struct free_entry *freelist; 1955212e11fSVishal Verma u32 *rtt; 1965212e11fSVishal Verma struct aligned_lock *map_locks; 1975212e11fSVishal Verma struct nd_btt *nd_btt; 1985212e11fSVishal Verma struct list_head list; 1995212e11fSVishal Verma struct dentry *debugfs_dir; 2005212e11fSVishal Verma /* Arena flags */ 2015212e11fSVishal Verma u32 flags; 202d9b83c75SVishal Verma struct mutex err_lock; 203*24e3a7fbSVishal Verma int log_index[2]; 2045212e11fSVishal Verma }; 2055212e11fSVishal Verma 2065212e11fSVishal Verma /** 2075212e11fSVishal Verma * struct btt - handle for a BTT instance 2085212e11fSVishal Verma * @btt_disk: Pointer to the gendisk for BTT device 2095212e11fSVishal Verma * @btt_queue: Pointer to the request queue for the BTT device 2105212e11fSVishal Verma * @arena_list: Head of the list of arenas 2115212e11fSVishal Verma * @debugfs_dir: Debugfs dentry 2125212e11fSVishal Verma * @nd_btt: Parent nd_btt struct 2135212e11fSVishal Verma * @nlba: Number of logical blocks exposed to the upper layers 2145212e11fSVishal Verma * after removing the amount of space needed by metadata 2155212e11fSVishal Verma * @rawsize: Total size in bytes of the available backing device 2165212e11fSVishal Verma * @lbasize: LBA size as requested and presented to upper layers. 2175212e11fSVishal Verma * This is sector_size + size of any metadata. 2185212e11fSVishal Verma * @sector_size: The Linux sector size - 512 or 4096 2195212e11fSVishal Verma * @lanes: Per-lane spinlocks 2205212e11fSVishal Verma * @init_lock: Mutex used for the BTT initialization 2215212e11fSVishal Verma * @init_state: Flag describing the initialization state for the BTT 2225212e11fSVishal Verma * @num_arenas: Number of arenas in the BTT instance 22313b7954cSVishal Verma * @phys_bb: Pointer to the namespace's badblocks structure 2245212e11fSVishal Verma */ 2255212e11fSVishal Verma struct btt { 2265212e11fSVishal Verma struct gendisk *btt_disk; 2275212e11fSVishal Verma struct request_queue *btt_queue; 2285212e11fSVishal Verma struct list_head arena_list; 2295212e11fSVishal Verma struct dentry *debugfs_dir; 2305212e11fSVishal Verma struct nd_btt *nd_btt; 2315212e11fSVishal Verma u64 nlba; 2325212e11fSVishal Verma unsigned long long rawsize; 2335212e11fSVishal Verma u32 lbasize; 2345212e11fSVishal Verma u32 sector_size; 2355212e11fSVishal Verma struct nd_region *nd_region; 2365212e11fSVishal Verma struct mutex init_lock; 2375212e11fSVishal Verma int init_state; 2385212e11fSVishal Verma int num_arenas; 239d9b83c75SVishal Verma struct badblocks *phys_bb; 2405212e11fSVishal Verma }; 241ab45e763SVishal Verma 242ab45e763SVishal Verma bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); 24314e49454SVishal Verma int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, 24414e49454SVishal Verma struct btt_sb *btt_sb); 245ab45e763SVishal Verma 2468c2f7e86SDan Williams #endif 247