1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _SYS_IB_ADAPTERS_TAVOR_MR_H 27 #define _SYS_IB_ADAPTERS_TAVOR_MR_H 28 29 /* 30 * tavor_mr.h 31 * Contains all of the prototypes, #defines, and structures necessary 32 * for the Tavor Memory Region/Window routines. 33 * Specifically it contains #defines, macros, and prototypes for each of 34 * the required memory region/window verbs that can be accessed through 35 * the IBTF's CI interfaces. In particular each of the prototypes defined 36 * below is called from a corresponding CI interface routine (as specified 37 * in the tavor_ci.c file). 38 */ 39 40 #include <sys/types.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 /* 50 * The following defines specify the default number of MPT entries and their 51 * individual entry size. Settings exist for the supported DDR DIMM sizes of 52 * 128MB and 256MB. If a DIMM greater than 256 is found, then the 256MB 53 * profile is used. See tavor_cfg.c for more discussion on config profiles. 54 * 55 * For manual configuration (not using config profiles), this value is 56 * controllable through the "tavor_log_num_mpt" configuration variable. To 57 * override config profile settings the 'tavor_alt_config_enable' configuration 58 * variable must first be set. 59 */ 60 #define TAVOR_NUM_MPT_SHIFT_128 0x14 61 #define TAVOR_NUM_MPT_SHIFT_256 0x15 62 #define TAVOR_MPT_SIZE_SHIFT 0x6 63 #define TAVOR_MPT_SIZE (1 << TAVOR_MPT_SIZE_SHIFT) 64 65 /* 66 * Minimal configuration value. 67 */ 68 #define TAVOR_NUM_MPT_SHIFT_MIN 0xD 69 70 /* 71 * The following defines specify the size of each individual MTT entry and 72 * the number of MTT entries that make up an MTT segment (TAVOR_MTTSEG_SIZE) 73 */ 74 #define TAVOR_MTT_SIZE_SHIFT 0x3 75 #define TAVOR_MTT_SIZE (1 << TAVOR_MTT_SIZE_SHIFT) 76 #define TAVOR_MTTSEG_SIZE_SHIFT 0x0 77 #define TAVOR_MTTSEG_SIZE (8 << TAVOR_MTTSEG_SIZE_SHIFT) 78 79 /* 80 * These define the total number of MTT segments. By default we are setting 81 * this number of MTT segments (the MTT table size) to 2M segments. This 82 * default value is used to initialize the "tavor_log_num_mttseg" config 83 * variable. 84 * Note: Each segment is currently set to 8 MTT entries (TAVOR_MTTSEG_SIZE). 85 * This means that we can support up to 16M MTT entries (i.e. "pages"). 86 */ 87 #define TAVOR_NUM_MTTSEG_SHIFT 0x15 88 #define TAVOR_NUM_MTTSEG (1 << TAVOR_NUM_MTTSEG_SHIFT) 89 90 /* 91 * Minimal configuration value. 92 */ 93 #define TAVOR_NUM_MTTSEG_SHIFT_MIN 0x11 94 95 /* 96 * Macro to round a number of MTT entries to the number of MTT segments. 97 */ 98 #define TAVOR_NUMMTT_TO_MTTSEG(num) \ 99 (((num) + TAVOR_MTTSEG_SIZE - 1) >> \ 100 (TAVOR_MTTSEG_SIZE_SHIFT + TAVOR_MTT_SIZE_SHIFT)) 101 102 /* 103 * This define is used to specify the "MTT page walk version" in the Tavor 104 * INIT_HCA command. 105 */ 106 #define TAVOR_MTT_PG_WALK_VER 0 107 108 /* 109 * This define is the maximum size of a memory region or window (log 2). It is 110 * set depending on size of the DDR being either 128MB or 256MB. These defines 111 * are used to initialize the "tavor_log_max_mrw_sz" configuration variable, 112 * and are proportional to the max MPT size set above. 113 */ 114 #define TAVOR_MAX_MEM_MPT_SHIFT_128 0x23 115 #define TAVOR_MAX_MEM_MPT_SHIFT_256 0x24 116 117 /* 118 * Minimal configuration value. 119 */ 120 #define TAVOR_MAX_MEM_MPT_SHIFT_MIN 0x1E 121 122 /* 123 * Defines used by tavor_mr_deregister() to specify how much/to what extent 124 * a given memory regions resources should be freed up. TAVOR_MR_DEREG_ALL 125 * says what it means, free up all the resources associated with the region. 126 * TAVOR_MR_DEREG_NO_HW2SW_MPT indicates that it is unnecessary to attempt 127 * the ownership transfer (from hardware to software) for the given MPT entry. 128 * And TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND indicates that it is not only 129 * unnecessary to attempt the ownership transfer for MPT, but it is also 130 * unnecessary to attempt to unbind the memory. 131 * In general, these last two are specified when tavor_mr_deregister() is 132 * called from tavor_mr_reregister(), where the MPT ownership transfer or 133 * memory unbinding may have already been successfully performed. 134 */ 135 #define TAVOR_MR_DEREG_ALL 3 136 #define TAVOR_MR_DEREG_NO_HW2SW_MPT 2 137 #define TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND 1 138 139 /* 140 * The following define is used by tavor_mr_rereg_xlat_helper() to determine 141 * whether or not a given DMA handle can be reused. If the DMA handle was 142 * previously initialized for IOMMU bypass mapping, then it can not be reused 143 * to reregister a region for DDI_DMA_STREAMING access. 144 */ 145 #define TAVOR_MR_REUSE_DMAHDL(mr, flags) \ 146 (((mr)->mr_bindinfo.bi_bypass != TAVOR_BINDMEM_BYPASS) || \ 147 !((flags) & IBT_MR_NONCOHERENT)) 148 149 /* 150 * The tavor_sw_refcnt_t structure is used internally by the Tavor driver to 151 * track all the information necessary to manage shared memory regions. Since 152 * a shared memory region _will_ have its own distinct MPT entry, but will 153 * _share_ its MTT entries with another region, it is necessary to track the 154 * number of times a given MTT structure is shared. This ensures that it will 155 * not be prematurely freed up and that can be destroyed only when it is 156 * appropriate to do so. 157 * 158 * Each tavor_sw_refcnt_t structure contains a lock and a reference count 159 * variable which are used to track the necessary information. 160 * 161 * The following macros (below) are used to manipulate and query the MTT 162 * reference count parameters. TAVOR_MTT_REFCNT_INIT() is used to initialize 163 * a newly allocated tavor_sw_refcnt_t struct (setting the "swrc_refcnt" to 1). 164 * And the TAVOR_MTT_IS_NOT_SHARED() and TAVOR_MTT_IS_SHARED() macros are 165 * used to query the current status of tavor_sw_refcnt_t struct to determine 166 * if its "swrc_refcnt" is one or not. 167 */ 168 typedef struct tavor_sw_refcnt_s { 169 kmutex_t swrc_lock; 170 uint_t swrc_refcnt; 171 } tavor_sw_refcnt_t; 172 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_refcnt_t::swrc_refcnt)) 173 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_refcnt_t::swrc_lock, 174 tavor_sw_refcnt_t::swrc_refcnt)) 175 #define TAVOR_MTT_REFCNT_INIT(swrc_tmp) ((swrc_tmp)->swrc_refcnt = 1) 176 #define TAVOR_MTT_IS_NOT_SHARED(swrc_tmp) ((swrc_tmp)->swrc_refcnt == 1) 177 #define TAVOR_MTT_IS_SHARED(swrc_tmp) ((swrc_tmp)->swrc_refcnt != 1) 178 179 180 /* 181 * The tavor_bind_info_t structure is used internally by the Tavor driver to 182 * track all the information necessary to perform the DMA mappings necessary 183 * for memory registration. It is specifically passed into both the 184 * tavor_mr_mem_bind() and tavor_mr_mtt_write() functions which perform most 185 * of the necessary operations for Tavor memory registration. 186 * 187 * This structure is used to pass all the information necessary for a call 188 * to either ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle(). Note: 189 * the fields which need to be valid for each type of binding are slightly 190 * different and that it indicated by the value in the "bi_type" field. The 191 * "bi_type" field may be set to either of the following defined values: 192 * TAVOR_BINDHDL_VADDR (to indicate an "addr" bind) or TAVOR_BINDHDL_BUF (to 193 * indicate a "buf" bind). 194 * 195 * Upon return from tavor_mr_mem_bind(), the tavor_bind_info_t struct will 196 * have its "bi_dmahdl", "bi_dmacookie", and "bi_cookiecnt" fields filled in. 197 * It is these values which are of particular interest to the 198 * tavor_mr_mtt_write() routine (they hold the PCI mapped addresses). 199 * 200 * Once initialized and used in this way, the tavor_bind_info_t will not to be 201 * modified in anyway until it is subsequently passed to tavor_mr_mem_unbind() 202 * where the memory and resources will be unbound and reclaimed. Note: the 203 * "bi_free_dmahdl" flag indicated whether the ddi_dma_handle_t should be 204 * freed as part of the tavor_mr_mem_unbind() operation or whether it will 205 * be freed later elsewhere. 206 */ 207 typedef struct tavor_bind_info_s { 208 uint64_t bi_addr; 209 uint64_t bi_len; 210 struct as *bi_as; 211 struct buf *bi_buf; 212 ddi_dma_handle_t bi_dmahdl; 213 ddi_dma_cookie_t bi_dmacookie; 214 uint_t bi_cookiecnt; 215 uint_t bi_type; 216 uint_t bi_flags; 217 uint_t bi_bypass; 218 uint_t bi_free_dmahdl; 219 } tavor_bind_info_t; 220 #define TAVOR_BINDHDL_NONE 0 221 #define TAVOR_BINDHDL_VADDR 1 222 #define TAVOR_BINDHDL_BUF 2 223 #define TAVOR_BINDHDL_UBUF 3 224 225 /* 226 * The tavor_sw_mr_s structure is also referred to using the "tavor_mrhdl_t" 227 * typedef (see tavor_typedef.h). It encodes all the information necessary 228 * to track the various resources needed to register, reregister, deregister, 229 * and perform all the myriad other operations on both memory regions _and_ 230 * memory windows. 231 * 232 * A pointer to this structure is returned from many of the IBTF's CI verbs 233 * interfaces for memory registration. 234 * 235 * It contains pointers to the various resources allocated for a memory 236 * region, i.e. MPT resource, MTT resource, and MTT reference count resource. 237 * In addition it contains the tavor_bind_info_t struct used for the memory 238 * bind operation on a given memory region. 239 * 240 * It also has a pointers to the associated PD handle, placeholders for access 241 * flags, memory keys, and suggested page size for the region. It also has 242 * the necessary backpointer to the resource that corresponds to the structure 243 * itself. And lastly, it contains a placeholder for a callback which should 244 * be called on memory region unpinning. 245 */ 246 struct tavor_sw_mr_s { 247 kmutex_t mr_lock; 248 tavor_rsrc_t *mr_mptrsrcp; 249 tavor_rsrc_t *mr_mttrsrcp; 250 tavor_rsrc_t *mr_mttrefcntp; 251 tavor_pdhdl_t mr_pdhdl; 252 tavor_bind_info_t mr_bindinfo; 253 ibt_mr_attr_flags_t mr_accflag; 254 uint32_t mr_lkey; 255 uint32_t mr_rkey; 256 uint32_t mr_logmttpgsz; 257 tavor_rsrc_t *mr_rsrcp; 258 uint_t mr_is_umem; 259 ddi_umem_cookie_t mr_umemcookie; 260 void (*mr_umem_cbfunc)(void *, void *); 261 void *mr_umem_cbarg1; 262 void *mr_umem_cbarg2; 263 }; 264 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_mr_s::mr_bindinfo 265 tavor_sw_mr_s::mr_lkey 266 tavor_sw_mr_s::mr_is_umem)) 267 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_mr_s::mr_lock, 268 tavor_sw_mr_s::mr_mptrsrcp 269 tavor_sw_mr_s::mr_mttrsrcp 270 tavor_sw_mr_s::mr_mttrefcntp 271 tavor_sw_mr_s::mr_bindinfo 272 tavor_sw_mr_s::mr_lkey 273 tavor_sw_mr_s::mr_rkey 274 tavor_sw_mr_s::mr_logmttpgsz 275 tavor_sw_mr_s::mr_rsrcp 276 tavor_sw_mr_s::mr_is_umem 277 tavor_sw_mr_s::mr_umemcookie 278 tavor_sw_mr_s::mr_umem_cbfunc 279 tavor_sw_mr_s::mr_umem_cbarg1 280 tavor_sw_mr_s::mr_umem_cbarg2)) 281 282 /* 283 * The tavor_mr_options_t structure is used in several of the Tavor memory 284 * registration routines to provide additional option functionality. When 285 * a NULL pointer is passed in place of a pointer to this struct, it is a 286 * way of specifying the "default" behavior. Using this structure, however, 287 * is a way of controlling any extended behavior. 288 * 289 * Currently, the only defined "extended" behaviors are for specifying whether 290 * a given memory region should bypass the PCI IOMMU (TAVOR_BINDMEM_BYPASS) 291 * or be mapped into the IOMMU (TAVOR_BINDMEM_NORMAL), for specifying whether 292 * a given ddi_dma_handle_t should be used in the bind operation, and for 293 * specifying whether a memory registration should attempt to return an IB 294 * vaddr which is "zero-based" (aids in alignment contraints for QPs). 295 * 296 * This defaults today to always bypassing the IOMMU (can be changed by using 297 * the "tavor_iommu_bypass" configuration variable), to always allocating 298 * a new dma handle, and to using the virtual address passed in (i.e. not 299 * "zero-based"). 300 */ 301 typedef struct tavor_mr_options_s { 302 ddi_dma_handle_t mro_bind_dmahdl; 303 uint_t mro_bind_type; 304 uint_t mro_bind_override_addr; 305 } tavor_mr_options_t; 306 #define TAVOR_BINDMEM_NORMAL 1 307 #define TAVOR_BINDMEM_BYPASS 0 308 309 int tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl, 310 ibt_dmr_attr_t *attr_p, tavor_mrhdl_t *mrhdl); 311 int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl, 312 ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 313 int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl, 314 ibt_smr_attr_t *attrp, struct buf *buf, tavor_mrhdl_t *mrhdl, 315 tavor_mr_options_t *op); 316 int tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 317 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsz_bits); 318 int tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 319 tavor_rsrc_t *mtt); 320 int tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 321 tavor_pdhdl_t pdhdl, ibt_smr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new); 322 int tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, 323 uint_t level, uint_t sleep); 324 int tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mrhdl, 325 ibt_mr_query_attr_t *attr); 326 int tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mrhdl, 327 tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new, 328 tavor_mr_options_t *op); 329 int tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 330 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 331 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op); 332 int tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, 333 size_t num_segs); 334 int tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pdhdl, 335 ibt_mw_flags_t flags, tavor_mwhdl_t *mwhdl); 336 int tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep); 337 void tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key); 338 339 #ifdef __cplusplus 340 } 341 #endif 342 343 #endif /* _SYS_IB_ADAPTERS_TAVOR_MR_H */ 344