1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_IB_ADAPTERS_TAVOR_MR_H 28 #define _SYS_IB_ADAPTERS_TAVOR_MR_H 29 30 /* 31 * tavor_mr.h 32 * Contains all of the prototypes, #defines, and structures necessary 33 * for the Tavor Memory Region/Window routines. 34 * Specifically it contains #defines, macros, and prototypes for each of 35 * the required memory region/window verbs that can be accessed through 36 * the IBTF's CI interfaces. In particular each of the prototypes defined 37 * below is called from a corresponding CI interface routine (as specified 38 * in the tavor_ci.c file). 39 */ 40 41 #include <sys/types.h> 42 #include <sys/conf.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 46 #ifdef __cplusplus 47 extern "C" { 48 #endif 49 50 /* 51 * The following defines specify the default number of MPT entries and their 52 * individual entry size. Settings exist for the supported DDR DIMM sizes of 53 * 128MB and 256MB. If a DIMM greater than 256 is found, then the 256MB 54 * profile is used. See tavor_cfg.c for more discussion on config profiles. 55 * 56 * For manual configuration (not using config profiles), this value is 57 * controllable through the "tavor_log_num_mpt" configuration variable. To 58 * override config profile settings the 'tavor_alt_config_enable' configuration 59 * variable must first be set. 60 */ 61 #define TAVOR_NUM_MPT_SHIFT_128 0x14 62 #define TAVOR_NUM_MPT_SHIFT_256 0x15 63 #define TAVOR_MPT_SIZE_SHIFT 0x6 64 #define TAVOR_MPT_SIZE (1 << TAVOR_MPT_SIZE_SHIFT) 65 66 /* 67 * Minimal configuration value. 68 */ 69 #define TAVOR_NUM_MPT_SHIFT_MIN 0xD 70 71 /* 72 * The following defines specify the size of each individual MTT entry and 73 * the number of MTT entries that make up an MTT segment (TAVOR_MTTSEG_SIZE) 74 */ 75 #define TAVOR_MTT_SIZE_SHIFT 0x3 76 #define TAVOR_MTT_SIZE (1 << TAVOR_MTT_SIZE_SHIFT) 77 #define TAVOR_MTTSEG_SIZE_SHIFT 0x0 78 #define TAVOR_MTTSEG_SIZE (8 << TAVOR_MTTSEG_SIZE_SHIFT) 79 80 /* 81 * These define the total number of MTT segments. By default we are setting 82 * this number of MTT segments (the MTT table size) to 2M segments. This 83 * default value is used to initialize the "tavor_log_num_mttseg" config 84 * variable. 85 * Note: Each segment is currently set to 8 MTT entries (TAVOR_MTTSEG_SIZE). 86 * This means that we can support up to 16M MTT entries (i.e. "pages"). 87 */ 88 #define TAVOR_NUM_MTTSEG_SHIFT 0x15 89 #define TAVOR_NUM_MTTSEG (1 << TAVOR_NUM_MTTSEG_SHIFT) 90 91 /* 92 * Minimal configuration value. 93 */ 94 #define TAVOR_NUM_MTTSEG_SHIFT_MIN 0x11 95 96 /* 97 * Macro to round a number of MTT entries to the number of MTT segments. 98 */ 99 #define TAVOR_NUMMTT_TO_MTTSEG(num) \ 100 (((num) + TAVOR_MTTSEG_SIZE - 1) >> \ 101 (TAVOR_MTTSEG_SIZE_SHIFT + TAVOR_MTT_SIZE_SHIFT)) 102 103 /* 104 * This define is used to specify the "MTT page walk version" in the Tavor 105 * INIT_HCA command. 106 */ 107 #define TAVOR_MTT_PG_WALK_VER 0 108 109 /* 110 * This define is the maximum size of a memory region or window (log 2). It is 111 * set depending on size of the DDR being either 128MB or 256MB. These defines 112 * are used to initialize the "tavor_log_max_mrw_sz" configuration variable, 113 * and are proportional to the max MPT size set above. 114 */ 115 #define TAVOR_MAX_MEM_MPT_SHIFT_128 0x23 116 #define TAVOR_MAX_MEM_MPT_SHIFT_256 0x24 117 118 /* 119 * Minimal configuration value. 120 */ 121 #define TAVOR_MAX_MEM_MPT_SHIFT_MIN 0x1E 122 123 /* 124 * Defines used by tavor_mr_deregister() to specify how much/to what extent 125 * a given memory regions resources should be freed up. TAVOR_MR_DEREG_ALL 126 * says what it means, free up all the resources associated with the region. 127 * TAVOR_MR_DEREG_NO_HW2SW_MPT indicates that it is unnecessary to attempt 128 * the ownership transfer (from hardware to software) for the given MPT entry. 129 * And TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND indicates that it is not only 130 * unnecessary to attempt the ownership transfer for MPT, but it is also 131 * unnecessary to attempt to unbind the memory. 132 * In general, these last two are specified when tavor_mr_deregister() is 133 * called from tavor_mr_reregister(), where the MPT ownership transfer or 134 * memory unbinding may have already been successfully performed. 135 */ 136 #define TAVOR_MR_DEREG_ALL 3 137 #define TAVOR_MR_DEREG_NO_HW2SW_MPT 2 138 #define TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND 1 139 140 /* 141 * The following define is used by tavor_mr_rereg_xlat_helper() to determine 142 * whether or not a given DMA handle can be reused. If the DMA handle was 143 * previously initialized for IOMMU bypass mapping, then it can not be reused 144 * to reregister a region for DDI_DMA_STREAMING access. 145 */ 146 #define TAVOR_MR_REUSE_DMAHDL(mr, flags) \ 147 (((mr)->mr_bindinfo.bi_bypass != TAVOR_BINDMEM_BYPASS) || \ 148 !((flags) & IBT_MR_NONCOHERENT)) 149 150 /* 151 * The tavor_sw_refcnt_t structure is used internally by the Tavor driver to 152 * track all the information necessary to manage shared memory regions. Since 153 * a shared memory region _will_ have its own distinct MPT entry, but will 154 * _share_ its MTT entries with another region, it is necessary to track the 155 * number of times a given MTT structure is shared. This ensures that it will 156 * not be prematurely freed up and that can be destroyed only when it is 157 * appropriate to do so. 158 * 159 * Each tavor_sw_refcnt_t structure contains a lock and a reference count 160 * variable which are used to track the necessary information. 161 * 162 * The following macros (below) are used to manipulate and query the MTT 163 * reference count parameters. TAVOR_MTT_REFCNT_INIT() is used to initialize 164 * a newly allocated tavor_sw_refcnt_t struct (setting the "swrc_refcnt" to 1). 165 * And the TAVOR_MTT_IS_NOT_SHARED() and TAVOR_MTT_IS_SHARED() macros are 166 * used to query the current status of tavor_sw_refcnt_t struct to determine 167 * if its "swrc_refcnt" is one or not. 168 */ 169 typedef struct tavor_sw_refcnt_s { 170 kmutex_t swrc_lock; 171 uint_t swrc_refcnt; 172 } tavor_sw_refcnt_t; 173 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_refcnt_t::swrc_refcnt)) 174 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_refcnt_t::swrc_lock, 175 tavor_sw_refcnt_t::swrc_refcnt)) 176 #define TAVOR_MTT_REFCNT_INIT(swrc_tmp) ((swrc_tmp)->swrc_refcnt = 1) 177 #define TAVOR_MTT_IS_NOT_SHARED(swrc_tmp) ((swrc_tmp)->swrc_refcnt == 1) 178 #define TAVOR_MTT_IS_SHARED(swrc_tmp) ((swrc_tmp)->swrc_refcnt != 1) 179 180 181 /* 182 * The tavor_bind_info_t structure is used internally by the Tavor driver to 183 * track all the information necessary to perform the DMA mappings necessary 184 * for memory registration. It is specifically passed into both the 185 * tavor_mr_mem_bind() and tavor_mr_mtt_write() functions which perform most 186 * of the necessary operations for Tavor memory registration. 187 * 188 * This structure is used to pass all the information necessary for a call 189 * to either ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle(). Note: 190 * the fields which need to be valid for each type of binding are slightly 191 * different and that it indicated by the value in the "bi_type" field. The 192 * "bi_type" field may be set to either of the following defined values: 193 * TAVOR_BINDHDL_VADDR (to indicate an "addr" bind) or TAVOR_BINDHDL_BUF (to 194 * indicate a "buf" bind). 195 * 196 * Upon return from tavor_mr_mem_bind(), the tavor_bind_info_t struct will 197 * have its "bi_dmahdl", "bi_dmacookie", and "bi_cookiecnt" fields filled in. 198 * It is these values which are of particular interest to the 199 * tavor_mr_mtt_write() routine (they hold the PCI mapped addresses). 200 * 201 * Once initialized and used in this way, the tavor_bind_info_t will not to be 202 * modified in anyway until it is subsequently passed to tavor_mr_mem_unbind() 203 * where the memory and resources will be unbound and reclaimed. Note: the 204 * "bi_free_dmahdl" flag indicated whether the ddi_dma_handle_t should be 205 * freed as part of the tavor_mr_mem_unbind() operation or whether it will 206 * be freed later elsewhere. 207 */ 208 typedef struct tavor_bind_info_s { 209 uint64_t bi_addr; 210 uint64_t bi_len; 211 struct as *bi_as; 212 struct buf *bi_buf; 213 ddi_dma_handle_t bi_dmahdl; 214 ddi_dma_cookie_t bi_dmacookie; 215 uint_t bi_cookiecnt; 216 uint_t bi_type; 217 uint_t bi_flags; 218 uint_t bi_bypass; 219 uint_t bi_free_dmahdl; 220 } tavor_bind_info_t; 221 #define TAVOR_BINDHDL_NONE 0 222 #define TAVOR_BINDHDL_VADDR 1 223 #define TAVOR_BINDHDL_BUF 2 224 #define TAVOR_BINDHDL_UBUF 3 225 226 /* 227 * The tavor_sw_mr_s structure is also referred to using the "tavor_mrhdl_t" 228 * typedef (see tavor_typedef.h). It encodes all the information necessary 229 * to track the various resources needed to register, reregister, deregister, 230 * and perform all the myriad other operations on both memory regions _and_ 231 * memory windows. 232 * 233 * A pointer to this structure is returned from many of the IBTF's CI verbs 234 * interfaces for memory registration. 235 * 236 * It contains pointers to the various resources allocated for a memory 237 * region, i.e. MPT resource, MTT resource, and MTT reference count resource. 238 * In addition it contains the tavor_bind_info_t struct used for the memory 239 * bind operation on a given memory region. 240 * 241 * It also has a pointers to the associated PD handle, placeholders for access 242 * flags, memory keys, and suggested page size for the region. It also has 243 * the necessary backpointer to the resource that corresponds to the structure 244 * itself. And lastly, it contains a placeholder for a callback which should 245 * be called on memory region unpinning. 246 */ 247 struct tavor_sw_mr_s { 248 kmutex_t mr_lock; 249 tavor_rsrc_t *mr_mptrsrcp; 250 tavor_rsrc_t *mr_mttrsrcp; 251 tavor_rsrc_t *mr_mttrefcntp; 252 tavor_pdhdl_t mr_pdhdl; 253 tavor_bind_info_t mr_bindinfo; 254 ibt_mr_attr_flags_t mr_accflag; 255 uint32_t mr_lkey; 256 uint32_t mr_rkey; 257 uint32_t mr_logmttpgsz; 258 tavor_rsrc_t *mr_rsrcp; 259 uint_t mr_is_fmr; 260 tavor_fmr_list_t *mr_fmr; 261 uint_t mr_is_umem; 262 ddi_umem_cookie_t mr_umemcookie; 263 void (*mr_umem_cbfunc)(void *, void *); 264 void *mr_umem_cbarg1; 265 void *mr_umem_cbarg2; 266 }; 267 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_mr_s::mr_bindinfo 268 tavor_sw_mr_s::mr_lkey 269 tavor_sw_mr_s::mr_is_umem 270 tavor_sw_mr_s::mr_is_fmr 271 tavor_sw_mr_s::mr_fmr)) 272 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_mr_s::mr_lock, 273 tavor_sw_mr_s::mr_mptrsrcp 274 tavor_sw_mr_s::mr_mttrsrcp 275 tavor_sw_mr_s::mr_mttrefcntp 276 tavor_sw_mr_s::mr_bindinfo 277 tavor_sw_mr_s::mr_lkey 278 tavor_sw_mr_s::mr_rkey 279 tavor_sw_mr_s::mr_logmttpgsz 280 tavor_sw_mr_s::mr_rsrcp 281 tavor_sw_mr_s::mr_is_umem 282 tavor_sw_mr_s::mr_umemcookie 283 tavor_sw_mr_s::mr_umem_cbfunc 284 tavor_sw_mr_s::mr_umem_cbarg1 285 tavor_sw_mr_s::mr_umem_cbarg2)) 286 287 /* 288 * The tavor_mr_options_t structure is used in several of the Tavor memory 289 * registration routines to provide additional option functionality. When 290 * a NULL pointer is passed in place of a pointer to this struct, it is a 291 * way of specifying the "default" behavior. Using this structure, however, 292 * is a way of controlling any extended behavior. 293 * 294 * Currently, the only defined "extended" behaviors are for specifying whether 295 * a given memory region should bypass the PCI IOMMU (TAVOR_BINDMEM_BYPASS) 296 * or be mapped into the IOMMU (TAVOR_BINDMEM_NORMAL), for specifying whether 297 * a given ddi_dma_handle_t should be used in the bind operation, and for 298 * specifying whether a memory registration should attempt to return an IB 299 * vaddr which is "zero-based" (aids in alignment contraints for QPs). 300 * 301 * This defaults today to always bypassing the IOMMU (can be changed by using 302 * the "tavor_iommu_bypass" configuration variable), to always allocating 303 * a new dma handle, and to using the virtual address passed in (i.e. not 304 * "zero-based"). 305 */ 306 typedef struct tavor_mr_options_s { 307 ddi_dma_handle_t mro_bind_dmahdl; 308 uint_t mro_bind_type; 309 uint_t mro_bind_override_addr; 310 } tavor_mr_options_t; 311 #define TAVOR_BINDMEM_NORMAL 1 312 #define TAVOR_BINDMEM_BYPASS 0 313 314 int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl, 315 ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 316 int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl, 317 ibt_smr_attr_t *attrp, struct buf *buf, tavor_mrhdl_t *mrhdl, 318 tavor_mr_options_t *op); 319 int tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 320 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsz_bits); 321 int tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 322 tavor_rsrc_t *mtt); 323 int tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 324 tavor_pdhdl_t pdhdl, ibt_smr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new); 325 int tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, 326 uint_t level, uint_t sleep); 327 int tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mrhdl, 328 ibt_mr_query_attr_t *attr); 329 int tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mrhdl, 330 tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new, 331 tavor_mr_options_t *op); 332 int tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 333 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 334 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op); 335 int tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, 336 size_t num_segs); 337 int tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pdhdl, 338 ibt_mw_flags_t flags, tavor_mwhdl_t *mwhdl); 339 int tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep); 340 void tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key); 341 int tavor_mr_alloc_fmr(tavor_state_t *state, tavor_pdhdl_t pd, 342 tavor_fmrhdl_t fmr_pool, tavor_mrhdl_t *mrhdl); 343 int tavor_mr_dealloc_fmr(tavor_state_t *state, tavor_mrhdl_t *mrhdl); 344 int tavor_mr_register_physical_fmr(tavor_state_t *state, 345 ibt_pmr_attr_t *mem_pattr_p, tavor_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p); 346 int tavor_mr_invalidate_fmr(tavor_state_t *state, tavor_mrhdl_t mr); 347 int tavor_mr_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr); 348 349 350 #ifdef __cplusplus 351 } 352 #endif 353 354 #endif /* _SYS_IB_ADAPTERS_TAVOR_MR_H */ 355