1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_mr.c
28 * Hermon Memory Region/Window Routines
29 *
30 * Implements all the routines necessary to provide the requisite memory
31 * registration verbs. These include operations like RegisterMemRegion(),
32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33 * etc., that affect Memory Regions. It also includes the verbs that
34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35 * and QueryMemWindow().
36 */
37
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44
45 #include <sys/ib/adapters/hermon/hermon.h>
46
47 extern uint32_t hermon_kernel_data_ro;
48 extern uint32_t hermon_user_data_ro;
49 extern int hermon_rdma_debug;
50
51 /*
52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53 * of Hermon memory keys (LKeys and RKeys)
54 */
55 static uint_t hermon_memkey_cnt = 0x00;
56 #define HERMON_MEMKEY_SHIFT 24
57
58 /* initial state of an MPT */
59 #define HERMON_MPT_SW_OWNERSHIP 0xF /* memory regions */
60 #define HERMON_MPT_FREE 0x3 /* allocate lkey */
61
62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
63 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
64 hermon_mpt_rsrc_type_t mpt_type);
65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
66 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
67 hermon_mr_options_t *op);
68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
69 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
70 uint_t sleep, uint_t *dereg_level);
71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
72 hermon_bind_info_t *bind, uint_t *mtt_pgsize);
73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
74 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
75 static void hermon_mr_mem_unbind(hermon_state_t *state,
76 hermon_bind_info_t *bind);
77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
78 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
80 hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
83
84
85 /*
86 * The Hermon umem_lockmemory() callback ops. When userland memory is
87 * registered, these callback ops are specified. The hermon_umap_umemlock_cb()
88 * callback will be called whenever the memory for the corresponding
89 * ddi_umem_cookie_t is being freed.
90 */
91 static struct umem_callback_ops hermon_umem_cbops = {
92 UMEM_CALLBACK_VERSION,
93 hermon_umap_umemlock_cb,
94 };
95
96
97
98 /*
99 * hermon_mr_register()
100 * Context: Can be called from interrupt or base context.
101 */
102 int
hermon_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
104 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
105 hermon_mpt_rsrc_type_t mpt_type)
106 {
107 hermon_bind_info_t bind;
108 int status;
109
110 /*
111 * Fill in the "bind" struct. This struct provides the majority
112 * of the information that will be used to distinguish between an
113 * "addr" binding (as is the case here) and a "buf" binding (see
114 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
115 * which does most of the "heavy lifting" for the Hermon memory
116 * registration routines.
117 */
118 bind.bi_type = HERMON_BINDHDL_VADDR;
119 bind.bi_addr = mr_attr->mr_vaddr;
120 bind.bi_len = mr_attr->mr_len;
121 bind.bi_as = mr_attr->mr_as;
122 bind.bi_flags = mr_attr->mr_flags;
123 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
124 mpt_type);
125 return (status);
126 }
127
128
129 /*
130 * hermon_mr_register_buf()
131 * Context: Can be called from interrupt or base context.
132 */
133 int
hermon_mr_register_buf(hermon_state_t * state,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
135 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
136 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
137 {
138 hermon_bind_info_t bind;
139 int status;
140
141 /*
142 * Fill in the "bind" struct. This struct provides the majority
143 * of the information that will be used to distinguish between an
144 * "addr" binding (see above) and a "buf" binding (as is the case
145 * here). The "bind" struct is later passed to hermon_mr_mem_bind()
146 * which does most of the "heavy lifting" for the Hermon memory
147 * registration routines. Note: We have chosen to provide
148 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
149 * not set). It is not critical what value we choose here as it need
150 * only be unique for the given RKey (which will happen by default),
151 * so the choice here is somewhat arbitrary.
152 */
153 bind.bi_type = HERMON_BINDHDL_BUF;
154 bind.bi_buf = buf;
155 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
156 bind.bi_addr = mr_attr->mr_vaddr;
157 } else {
158 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
159 }
160 bind.bi_as = NULL;
161 bind.bi_len = (uint64_t)buf->b_bcount;
162 bind.bi_flags = mr_attr->mr_flags;
163 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
164 return (status);
165 }
166
167
168 /*
169 * hermon_mr_register_shared()
170 * Context: Can be called from interrupt or base context.
171 */
172 int
hermon_mr_register_shared(hermon_state_t * state,hermon_mrhdl_t mrhdl,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new)173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
174 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
175 {
176 hermon_rsrc_t *mpt, *mtt, *rsrc;
177 hermon_umap_db_entry_t *umapdb;
178 hermon_hw_dmpt_t mpt_entry;
179 hermon_mrhdl_t mr;
180 hermon_bind_info_t *bind;
181 ddi_umem_cookie_t umem_cookie;
182 size_t umem_len;
183 caddr_t umem_addr;
184 uint64_t mtt_addr, pgsize_msk;
185 uint_t sleep, mr_is_umem;
186 int status, umem_flags;
187
188 /*
189 * Check the sleep flag. Ensure that it is consistent with the
190 * current thread context (i.e. if we are currently in the interrupt
191 * context, then we shouldn't be attempting to sleep).
192 */
193 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
194 HERMON_SLEEP;
195 if ((sleep == HERMON_SLEEP) &&
196 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
197 status = IBT_INVALID_PARAM;
198 goto mrshared_fail;
199 }
200
201 /* Increment the reference count on the protection domain (PD) */
202 hermon_pd_refcnt_inc(pd);
203
204 /*
205 * Allocate an MPT entry. This will be filled in with all the
206 * necessary parameters to define the shared memory region.
207 * Specifically, it will be made to reference the currently existing
208 * MTT entries and ownership of the MPT will be passed to the hardware
209 * in the last step below. If we fail here, we must undo the
210 * protection domain reference count.
211 */
212 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
213 if (status != DDI_SUCCESS) {
214 status = IBT_INSUFF_RESOURCE;
215 goto mrshared_fail1;
216 }
217
218 /*
219 * Allocate the software structure for tracking the shared memory
220 * region (i.e. the Hermon Memory Region handle). If we fail here, we
221 * must undo the protection domain reference count and the previous
222 * resource allocation.
223 */
224 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
225 if (status != DDI_SUCCESS) {
226 status = IBT_INSUFF_RESOURCE;
227 goto mrshared_fail2;
228 }
229 mr = (hermon_mrhdl_t)rsrc->hr_addr;
230 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
231
232 /*
233 * Setup and validate the memory region access flags. This means
234 * translating the IBTF's enable flags into the access flags that
235 * will be used in later operations.
236 */
237 mr->mr_accflag = 0;
238 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
239 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
240 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
241 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
242 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
243 mr->mr_accflag |= IBT_MR_REMOTE_READ;
244 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
245 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
246 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
247 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
248
249 /*
250 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
251 * from a certain number of "constrained" bits (the least significant
252 * bits) and some number of "unconstrained" bits. The constrained
253 * bits must be set to the index of the entry in the MPT table, but
254 * the unconstrained bits can be set to any value we wish. Note:
255 * if no remote access is required, then the RKey value is not filled
256 * in. Otherwise both Rkey and LKey are given the same value.
257 */
258 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
259
260 /* Grab the MR lock for the current memory region */
261 mutex_enter(&mrhdl->mr_lock);
262
263 /*
264 * Check here to see if the memory region has already been partially
265 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
266 * If so, this is an error, return failure.
267 */
268 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
269 mutex_exit(&mrhdl->mr_lock);
270 status = IBT_MR_HDL_INVALID;
271 goto mrshared_fail3;
272 }
273
274 /*
275 * Determine if the original memory was from userland and, if so, pin
276 * the pages (again) with umem_lockmemory(). This will guarantee a
277 * separate callback for each of this shared region's MR handles.
278 * If this is userland memory, then allocate an entry in the
279 * "userland resources database". This will later be added to
280 * the database (after all further memory registration operations are
281 * successful). If we fail here, we must undo all the above setup.
282 */
283 mr_is_umem = mrhdl->mr_is_umem;
284 if (mr_is_umem) {
285 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
286 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
287 ~PAGEOFFSET);
288 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
289 DDI_UMEMLOCK_LONGTERM);
290 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
291 &umem_cookie, &hermon_umem_cbops, NULL);
292 if (status != 0) {
293 mutex_exit(&mrhdl->mr_lock);
294 status = IBT_INSUFF_RESOURCE;
295 goto mrshared_fail3;
296 }
297
298 umapdb = hermon_umap_db_alloc(state->hs_instance,
299 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
300 (uint64_t)(uintptr_t)rsrc);
301 if (umapdb == NULL) {
302 mutex_exit(&mrhdl->mr_lock);
303 status = IBT_INSUFF_RESOURCE;
304 goto mrshared_fail4;
305 }
306 }
307
308 /*
309 * Copy the MTT resource pointer (and additional parameters) from
310 * the original Hermon Memory Region handle. Note: this is normally
311 * where the hermon_mr_mem_bind() routine would be called, but because
312 * we already have bound and filled-in MTT entries it is simply a
313 * matter here of managing the MTT reference count and grabbing the
314 * address of the MTT table entries (for filling in the shared region's
315 * MPT entry).
316 */
317 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
318 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
319 mr->mr_bindinfo = mrhdl->mr_bindinfo;
320 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
321 mutex_exit(&mrhdl->mr_lock);
322 bind = &mr->mr_bindinfo;
323 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
324 mtt = mr->mr_mttrsrcp;
325
326 /*
327 * Increment the MTT reference count (to reflect the fact that
328 * the MTT is now shared)
329 */
330 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
331
332 /*
333 * Update the new "bind" virtual address. Do some extra work here
334 * to ensure proper alignment. That is, make sure that the page
335 * offset for the beginning of the old range is the same as the
336 * offset for this new mapping
337 */
338 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
339 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
340 (mr->mr_bindinfo.bi_addr & pgsize_msk));
341
342 /*
343 * Fill in the MPT entry. This is the final step before passing
344 * ownership of the MPT entry to the Hermon hardware. We use all of
345 * the information collected/calculated above to fill in the
346 * requisite portions of the MPT.
347 */
348 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
349 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
350 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
351 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
352 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
353 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
354 mpt_entry.lr = 1;
355 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
356 mpt_entry.entity_sz = mr->mr_logmttpgsz;
357 mpt_entry.mem_key = mr->mr_lkey;
358 mpt_entry.pd = pd->pd_pdnum;
359 mpt_entry.start_addr = bind->bi_addr;
360 mpt_entry.reg_win_len = bind->bi_len;
361 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
362 mpt_entry.mtt_addr_h = mtt_addr >> 32;
363 mpt_entry.mtt_addr_l = mtt_addr >> 3;
364
365 /*
366 * Write the MPT entry to hardware. Lastly, we pass ownership of
367 * the entry to the hardware. Note: in general, this operation
368 * shouldn't fail. But if it does, we have to undo everything we've
369 * done above before returning error.
370 */
371 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
372 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
373 if (status != HERMON_CMD_SUCCESS) {
374 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
375 status);
376 if (status == HERMON_CMD_INVALID_STATUS) {
377 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
378 }
379 status = ibc_get_ci_failure(0);
380 goto mrshared_fail5;
381 }
382
383 /*
384 * Fill in the rest of the Hermon Memory Region handle. Having
385 * successfully transferred ownership of the MPT, we can update the
386 * following fields for use in further operations on the MR.
387 */
388 mr->mr_mptrsrcp = mpt;
389 mr->mr_mttrsrcp = mtt;
390 mr->mr_mpt_type = HERMON_MPT_DMPT;
391 mr->mr_pdhdl = pd;
392 mr->mr_rsrcp = rsrc;
393 mr->mr_is_umem = mr_is_umem;
394 mr->mr_is_fmr = 0;
395 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
396 mr->mr_umem_cbfunc = NULL;
397 mr->mr_umem_cbarg1 = NULL;
398 mr->mr_umem_cbarg2 = NULL;
399 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
400 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
401
402 /*
403 * If this is userland memory, then we need to insert the previously
404 * allocated entry into the "userland resources database". This will
405 * allow for later coordination between the hermon_umap_umemlock_cb()
406 * callback and hermon_mr_deregister().
407 */
408 if (mr_is_umem) {
409 hermon_umap_db_add(umapdb);
410 }
411
412 *mrhdl_new = mr;
413
414 return (DDI_SUCCESS);
415
416 /*
417 * The following is cleanup for all possible failure cases in this routine
418 */
419 mrshared_fail5:
420 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
421 if (mr_is_umem) {
422 hermon_umap_db_free(umapdb);
423 }
424 mrshared_fail4:
425 if (mr_is_umem) {
426 ddi_umem_unlock(umem_cookie);
427 }
428 mrshared_fail3:
429 hermon_rsrc_free(state, &rsrc);
430 mrshared_fail2:
431 hermon_rsrc_free(state, &mpt);
432 mrshared_fail1:
433 hermon_pd_refcnt_dec(pd);
434 mrshared_fail:
435 return (status);
436 }
437
438 /*
439 * hermon_mr_alloc_fmr()
440 * Context: Can be called from interrupt or base context.
441 */
442 int
hermon_mr_alloc_fmr(hermon_state_t * state,hermon_pdhdl_t pd,hermon_fmrhdl_t fmr_pool,hermon_mrhdl_t * mrhdl)443 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
444 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
445 {
446 hermon_rsrc_t *mpt, *mtt, *rsrc;
447 hermon_hw_dmpt_t mpt_entry;
448 hermon_mrhdl_t mr;
449 hermon_bind_info_t bind;
450 uint64_t mtt_addr;
451 uint64_t nummtt;
452 uint_t sleep, mtt_pgsize_bits;
453 int status;
454 offset_t i;
455 hermon_icm_table_t *icm_table;
456 hermon_dma_info_t *dma_info;
457 uint32_t index1, index2, rindx;
458
459 /*
460 * Check the sleep flag. Ensure that it is consistent with the
461 * current thread context (i.e. if we are currently in the interrupt
462 * context, then we shouldn't be attempting to sleep).
463 */
464 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
465 HERMON_NOSLEEP;
466 if ((sleep == HERMON_SLEEP) &&
467 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
468 return (IBT_INVALID_PARAM);
469 }
470
471 /* Increment the reference count on the protection domain (PD) */
472 hermon_pd_refcnt_inc(pd);
473
474 /*
475 * Allocate an MPT entry. This will be filled in with all the
476 * necessary parameters to define the FMR. Specifically, it will be
477 * made to reference the currently existing MTT entries and ownership
478 * of the MPT will be passed to the hardware in the last step below.
479 * If we fail here, we must undo the protection domain reference count.
480 */
481
482 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
483 if (status != DDI_SUCCESS) {
484 status = IBT_INSUFF_RESOURCE;
485 goto fmralloc_fail1;
486 }
487
488 /*
489 * Allocate the software structure for tracking the fmr memory
490 * region (i.e. the Hermon Memory Region handle). If we fail here, we
491 * must undo the protection domain reference count and the previous
492 * resource allocation.
493 */
494 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
495 if (status != DDI_SUCCESS) {
496 status = IBT_INSUFF_RESOURCE;
497 goto fmralloc_fail2;
498 }
499 mr = (hermon_mrhdl_t)rsrc->hr_addr;
500 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
501
502 /*
503 * Setup and validate the memory region access flags. This means
504 * translating the IBTF's enable flags into the access flags that
505 * will be used in later operations.
506 */
507 mr->mr_accflag = 0;
508 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
509 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
510 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
511 mr->mr_accflag |= IBT_MR_REMOTE_READ;
512 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
513 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
514 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
515 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
516
517 /*
518 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
519 * from a certain number of "constrained" bits (the least significant
520 * bits) and some number of "unconstrained" bits. The constrained
521 * bits must be set to the index of the entry in the MPT table, but
522 * the unconstrained bits can be set to any value we wish. Note:
523 * if no remote access is required, then the RKey value is not filled
524 * in. Otherwise both Rkey and LKey are given the same value.
525 */
526 mr->mr_fmr_key = 1; /* ready for the next reload */
527 mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
528
529 /*
530 * Determine number of pages spanned. This routine uses the
531 * information in the "bind" struct to determine the required
532 * number of MTT entries needed (and returns the suggested page size -
533 * as a "power-of-2" - for each MTT entry).
534 */
535 /* Assume address will be page aligned later */
536 bind.bi_addr = 0;
537 /* Calculate size based on given max pages */
538 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
539 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
540
541 /*
542 * Allocate the MTT entries. Use the calculations performed above to
543 * allocate the required number of MTT entries. If we fail here, we
544 * must not only undo all the previous resource allocation (and PD
545 * reference count), but we must also unbind the memory.
546 */
547 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
548 if (status != DDI_SUCCESS) {
549 IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
550 status = IBT_INSUFF_RESOURCE;
551 goto fmralloc_fail3;
552 }
553 mr->mr_logmttpgsz = mtt_pgsize_bits;
554
555 /*
556 * Fill in the MPT entry. This is the final step before passing
557 * ownership of the MPT entry to the Hermon hardware. We use all of
558 * the information collected/calculated above to fill in the
559 * requisite portions of the MPT.
560 */
561 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
562 mpt_entry.en_bind = 0;
563 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
564 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
565 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
566 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
567 mpt_entry.lr = 1;
568 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
569 mpt_entry.pd = pd->pd_pdnum;
570
571 mpt_entry.entity_sz = mr->mr_logmttpgsz;
572 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
573 mpt_entry.fast_reg_en = 1;
574 mpt_entry.mtt_size = (uint_t)nummtt;
575 mpt_entry.mtt_addr_h = mtt_addr >> 32;
576 mpt_entry.mtt_addr_l = mtt_addr >> 3;
577 mpt_entry.mem_key = mr->mr_lkey;
578
579 /*
580 * FMR sets these to 0 for now. Later during actual fmr registration
581 * these values are filled in.
582 */
583 mpt_entry.start_addr = 0;
584 mpt_entry.reg_win_len = 0;
585
586 /*
587 * Write the MPT entry to hardware. Lastly, we pass ownership of
588 * the entry to the hardware. Note: in general, this operation
589 * shouldn't fail. But if it does, we have to undo everything we've
590 * done above before returning error.
591 */
592 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
593 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
594 if (status != HERMON_CMD_SUCCESS) {
595 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
596 status);
597 if (status == HERMON_CMD_INVALID_STATUS) {
598 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
599 }
600 status = ibc_get_ci_failure(0);
601 goto fmralloc_fail4;
602 }
603
604 /*
605 * Fill in the rest of the Hermon Memory Region handle. Having
606 * successfully transferred ownership of the MPT, we can update the
607 * following fields for use in further operations on the MR. Also, set
608 * that this is an FMR region.
609 */
610 mr->mr_mptrsrcp = mpt;
611 mr->mr_mttrsrcp = mtt;
612
613 mr->mr_mpt_type = HERMON_MPT_DMPT;
614 mr->mr_pdhdl = pd;
615 mr->mr_rsrcp = rsrc;
616 mr->mr_is_fmr = 1;
617 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
618 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
619 mr->mr_mttaddr = mtt_addr;
620 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
621
622 /* initialize hr_addr for use during register/deregister/invalidate */
623 icm_table = &state->hs_icm[HERMON_DMPT];
624 rindx = mpt->hr_indx;
625 hermon_index(index1, index2, rindx, icm_table, i);
626 dma_info = icm_table->icm_dma[index1] + index2;
627 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
628 mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
629
630 *mrhdl = mr;
631
632 return (DDI_SUCCESS);
633
634 /*
635 * The following is cleanup for all possible failure cases in this routine
636 */
637 fmralloc_fail4:
638 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
639 fmralloc_fail3:
640 hermon_rsrc_free(state, &rsrc);
641 fmralloc_fail2:
642 hermon_rsrc_free(state, &mpt);
643 fmralloc_fail1:
644 hermon_pd_refcnt_dec(pd);
645 return (status);
646 }
647
648
649 /*
650 * hermon_mr_register_physical_fmr()
651 * Context: Can be called from interrupt or base context.
652 */
653 /*ARGSUSED*/
654 int
hermon_mr_register_physical_fmr(hermon_state_t * state,ibt_pmr_attr_t * mem_pattr_p,hermon_mrhdl_t mr,ibt_pmr_desc_t * mem_desc_p)655 hermon_mr_register_physical_fmr(hermon_state_t *state,
656 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
657 {
658 hermon_rsrc_t *mpt;
659 uint64_t *mpt_table;
660 int status;
661 uint32_t key;
662
663 mutex_enter(&mr->mr_lock);
664 mpt = mr->mr_mptrsrcp;
665 mpt_table = (uint64_t *)mpt->hr_addr;
666
667 /* Write MPT status to SW bit */
668 *(uint8_t *)mpt_table = 0xF0;
669
670 membar_producer();
671
672 /*
673 * Write the mapped addresses into the MTT entries. FMR needs to do
674 * this a little differently, so we call the fmr specific fast mtt
675 * write here.
676 */
677 status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
678 mem_pattr_p, mr->mr_logmttpgsz);
679 if (status != DDI_SUCCESS) {
680 mutex_exit(&mr->mr_lock);
681 status = ibc_get_ci_failure(0);
682 goto fmr_reg_fail1;
683 }
684
685 /*
686 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
687 * from a certain number of "constrained" bits (the least significant
688 * bits) and some number of "unconstrained" bits. The constrained
689 * bits must be set to the index of the entry in the MPT table, but
690 * the unconstrained bits can be set to any value we wish. Note:
691 * if no remote access is required, then the RKey value is not filled
692 * in. Otherwise both Rkey and LKey are given the same value.
693 */
694 key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
695 mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
696
697 /* write mem key value */
698 *(uint32_t *)&mpt_table[1] = htonl(key);
699
700 /* write length value */
701 mpt_table[3] = htonll(mem_pattr_p->pmr_len);
702
703 /* write start addr value */
704 mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
705
706 /* write lkey value */
707 *(uint32_t *)&mpt_table[4] = htonl(key);
708
709 membar_producer();
710
711 /* Write MPT status to HW bit */
712 *(uint8_t *)mpt_table = 0x00;
713
714 /* Fill in return parameters */
715 mem_desc_p->pmd_lkey = mr->mr_lkey;
716 mem_desc_p->pmd_rkey = mr->mr_rkey;
717 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
718 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
719
720 /* Fill in MR bindinfo struct for later sync or query operations */
721 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
722 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
723
724 mutex_exit(&mr->mr_lock);
725
726 return (DDI_SUCCESS);
727
728 fmr_reg_fail1:
729 /*
730 * Note, we fail here, and purposely leave the memory ownership in
731 * software. The memory tables may be corrupt, so we leave the region
732 * unregistered.
733 */
734 return (status);
735 }
736
737
738 /*
739 * hermon_mr_deregister()
740 * Context: Can be called from interrupt or base context.
741 */
742 /* ARGSUSED */
743 int
hermon_mr_deregister(hermon_state_t * state,hermon_mrhdl_t * mrhdl,uint_t level,uint_t sleep)744 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
745 uint_t sleep)
746 {
747 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
748 hermon_umap_db_entry_t *umapdb;
749 hermon_pdhdl_t pd;
750 hermon_mrhdl_t mr;
751 hermon_bind_info_t *bind;
752 uint64_t value;
753 int status;
754 uint_t shared_mtt;
755
756 /*
757 * Check the sleep flag. Ensure that it is consistent with the
758 * current thread context (i.e. if we are currently in the interrupt
759 * context, then we shouldn't be attempting to sleep).
760 */
761 if ((sleep == HERMON_SLEEP) &&
762 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
763 status = IBT_INVALID_PARAM;
764 return (status);
765 }
766
767 /*
768 * Pull all the necessary information from the Hermon Memory Region
769 * handle. This is necessary here because the resource for the
770 * MR handle is going to be freed up as part of the this
771 * deregistration
772 */
773 mr = *mrhdl;
774 mutex_enter(&mr->mr_lock);
775 mpt = mr->mr_mptrsrcp;
776 mtt = mr->mr_mttrsrcp;
777 mtt_refcnt = mr->mr_mttrefcntp;
778 rsrc = mr->mr_rsrcp;
779 pd = mr->mr_pdhdl;
780 bind = &mr->mr_bindinfo;
781
782 /*
783 * Check here if the memory region is really an FMR. If so, this is a
784 * bad thing and we shouldn't be here. Return failure.
785 */
786 if (mr->mr_is_fmr) {
787 mutex_exit(&mr->mr_lock);
788 return (IBT_INVALID_PARAM);
789 }
790
791 /*
792 * Check here to see if the memory region has already been partially
793 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
794 * If so, then jump to the end and free the remaining resources.
795 */
796 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
797 goto mrdereg_finish_cleanup;
798 }
799 if (hermon_rdma_debug & 0x4)
800 IBTF_DPRINTF_L2("mr", "dereg: mr %p key %x",
801 mr, mr->mr_rkey);
802
803 /*
804 * We must drop the "mr_lock" here to ensure that both SLEEP and
805 * NOSLEEP calls into the firmware work as expected. Also, if two
806 * threads are attemping to access this MR (via de-register,
807 * re-register, or otherwise), then we allow the firmware to enforce
808 * the checking, that only one deregister is valid.
809 */
810 mutex_exit(&mr->mr_lock);
811
812 /*
813 * Reclaim MPT entry from hardware (if necessary). Since the
814 * hermon_mr_deregister() routine is used in the memory region
815 * reregistration process as well, it is possible that we will
816 * not always wish to reclaim ownership of the MPT. Check the
817 * "level" arg and, if necessary, attempt to reclaim it. If
818 * the ownership transfer fails for any reason, we check to see
819 * what command status was returned from the hardware. The only
820 * "expected" error status is the one that indicates an attempt to
821 * deregister a memory region that has memory windows bound to it
822 */
823 if (level >= HERMON_MR_DEREG_ALL) {
824 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
825 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
826 NULL, 0, mpt->hr_indx, sleep);
827 if (status != HERMON_CMD_SUCCESS) {
828 if (status == HERMON_CMD_REG_BOUND) {
829 return (IBT_MR_IN_USE);
830 } else {
831 cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
832 "command failed: %08x\n", status);
833 if (status ==
834 HERMON_CMD_INVALID_STATUS) {
835 hermon_fm_ereport(state,
836 HCA_SYS_ERR,
837 DDI_SERVICE_LOST);
838 }
839 return (IBT_INVALID_PARAM);
840 }
841 }
842 }
843 }
844
845 /*
846 * Re-grab the mr_lock here. Since further access to the protected
847 * 'mr' structure is needed, and we would have returned previously for
848 * the multiple deregistration case, we can safely grab the lock here.
849 */
850 mutex_enter(&mr->mr_lock);
851
852 /*
853 * If the memory had come from userland, then we do a lookup in the
854 * "userland resources database". On success, we free the entry, call
855 * ddi_umem_unlock(), and continue the cleanup. On failure (which is
856 * an indication that the umem_lockmemory() callback has called
857 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
858 * the "mr_umemcookie" field in the MR handle (this will be used
859 * later to detect that only partial cleaup still remains to be done
860 * on the MR handle).
861 */
862 if (mr->mr_is_umem) {
863 status = hermon_umap_db_find(state->hs_instance,
864 (uint64_t)(uintptr_t)mr->mr_umemcookie,
865 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
866 &umapdb);
867 if (status == DDI_SUCCESS) {
868 hermon_umap_db_free(umapdb);
869 ddi_umem_unlock(mr->mr_umemcookie);
870 } else {
871 ddi_umem_unlock(mr->mr_umemcookie);
872 mr->mr_umemcookie = NULL;
873 }
874 }
875
876 /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
877 if (mtt_refcnt != NULL) {
878 /*
879 * Decrement the MTT reference count. Since the MTT resource
880 * may be shared between multiple memory regions (as a result
881 * of a "RegisterSharedMR" verb) it is important that we not
882 * free up or unbind resources prematurely. If it's not shared
883 * (as indicated by the return status), then free the resource.
884 */
885 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
886 if (!shared_mtt) {
887 hermon_rsrc_free(state, &mtt_refcnt);
888 }
889
890 /*
891 * Free up the MTT entries and unbind the memory. Here,
892 * as above, we attempt to free these resources only if
893 * it is appropriate to do so.
894 * Note, 'bind' is NULL in the alloc_lkey case.
895 */
896 if (!shared_mtt) {
897 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
898 hermon_mr_mem_unbind(state, bind);
899 }
900 hermon_rsrc_free(state, &mtt);
901 }
902 }
903
904 /*
905 * If the MR handle has been invalidated, then drop the
906 * lock and return success. Note: This only happens because
907 * the umem_lockmemory() callback has been triggered. The
908 * cleanup here is partial, and further cleanup (in a
909 * subsequent hermon_mr_deregister() call) will be necessary.
910 */
911 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
912 mutex_exit(&mr->mr_lock);
913 return (DDI_SUCCESS);
914 }
915
916 mrdereg_finish_cleanup:
917 mutex_exit(&mr->mr_lock);
918
919 /* Free the Hermon Memory Region handle */
920 hermon_rsrc_free(state, &rsrc);
921
922 /* Free up the MPT entry resource */
923 if (mpt != NULL)
924 hermon_rsrc_free(state, &mpt);
925
926 /* Decrement the reference count on the protection domain (PD) */
927 hermon_pd_refcnt_dec(pd);
928
929 /* Set the mrhdl pointer to NULL and return success */
930 *mrhdl = NULL;
931
932 return (DDI_SUCCESS);
933 }
934
935 /*
936 * hermon_mr_dealloc_fmr()
937 * Context: Can be called from interrupt or base context.
938 */
939 /* ARGSUSED */
940 int
hermon_mr_dealloc_fmr(hermon_state_t * state,hermon_mrhdl_t * mrhdl)941 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
942 {
943 hermon_rsrc_t *mpt, *mtt, *rsrc;
944 hermon_pdhdl_t pd;
945 hermon_mrhdl_t mr;
946
947 /*
948 * Pull all the necessary information from the Hermon Memory Region
949 * handle. This is necessary here because the resource for the
950 * MR handle is going to be freed up as part of the this
951 * deregistration
952 */
953 mr = *mrhdl;
954 mutex_enter(&mr->mr_lock);
955 mpt = mr->mr_mptrsrcp;
956 mtt = mr->mr_mttrsrcp;
957 rsrc = mr->mr_rsrcp;
958 pd = mr->mr_pdhdl;
959 mutex_exit(&mr->mr_lock);
960
961 /* Free the MTT entries */
962 hermon_rsrc_free(state, &mtt);
963
964 /* Free the Hermon Memory Region handle */
965 hermon_rsrc_free(state, &rsrc);
966
967 /* Free up the MPT entry resource */
968 hermon_rsrc_free(state, &mpt);
969
970 /* Decrement the reference count on the protection domain (PD) */
971 hermon_pd_refcnt_dec(pd);
972
973 /* Set the mrhdl pointer to NULL and return success */
974 *mrhdl = NULL;
975
976 return (DDI_SUCCESS);
977 }
978
979
980 /*
981 * hermon_mr_query()
982 * Context: Can be called from interrupt or base context.
983 */
984 /* ARGSUSED */
985 int
hermon_mr_query(hermon_state_t * state,hermon_mrhdl_t mr,ibt_mr_query_attr_t * attr)986 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
987 ibt_mr_query_attr_t *attr)
988 {
989 int status;
990 hermon_hw_dmpt_t mpt_entry;
991 uint32_t lkey;
992
993 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
994
995 mutex_enter(&mr->mr_lock);
996
997 /*
998 * Check here to see if the memory region has already been partially
999 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1000 * If so, this is an error, return failure.
1001 */
1002 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1003 mutex_exit(&mr->mr_lock);
1004 return (IBT_MR_HDL_INVALID);
1005 }
1006
1007 status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1008 mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1009 HERMON_NOSLEEP);
1010 if (status != HERMON_CMD_SUCCESS) {
1011 cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1012 mutex_exit(&mr->mr_lock);
1013 return (ibc_get_ci_failure(0));
1014 }
1015
1016 /* Update the mr sw struct from the hw struct. */
1017 lkey = mpt_entry.mem_key;
1018 mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1019 mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1020 mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1021 mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1022 (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1023 (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1024 (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1025 (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1026 (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1027 mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1028 (mpt_entry.mtt_addr_l << 3);
1029 mr->mr_logmttpgsz = mpt_entry.entity_sz;
1030
1031 /* Fill in the queried attributes */
1032 attr->mr_lkey_state =
1033 (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1034 (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1035 IBT_KEY_VALID;
1036 attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1037 attr->mr_attr_flags = mr->mr_accflag;
1038 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1039
1040 /* Fill in the "local" attributes */
1041 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1042 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1043 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
1044
1045 /*
1046 * Fill in the "remote" attributes (if necessary). Note: the
1047 * remote attributes are only valid if the memory region has one
1048 * or more of the remote access flags set.
1049 */
1050 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1051 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1052 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1053 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1054 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1055 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
1056 }
1057
1058 /*
1059 * If region is mapped for streaming (i.e. noncoherent), then set sync
1060 * is required
1061 */
1062 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1063 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1064
1065 mutex_exit(&mr->mr_lock);
1066 return (DDI_SUCCESS);
1067 }
1068
1069
1070 /*
1071 * hermon_mr_reregister()
1072 * Context: Can be called from interrupt or base context.
1073 */
1074 int
hermon_mr_reregister(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1075 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1076 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1077 hermon_mr_options_t *op)
1078 {
1079 hermon_bind_info_t bind;
1080 int status;
1081
1082 /*
1083 * Fill in the "bind" struct. This struct provides the majority
1084 * of the information that will be used to distinguish between an
1085 * "addr" binding (as is the case here) and a "buf" binding (see
1086 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
1087 * which does most of the "heavy lifting" for the Hermon memory
1088 * registration (and reregistration) routines.
1089 */
1090 bind.bi_type = HERMON_BINDHDL_VADDR;
1091 bind.bi_addr = mr_attr->mr_vaddr;
1092 bind.bi_len = mr_attr->mr_len;
1093 bind.bi_as = mr_attr->mr_as;
1094 bind.bi_flags = mr_attr->mr_flags;
1095 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1096 return (status);
1097 }
1098
1099
1100 /*
1101 * hermon_mr_reregister_buf()
1102 * Context: Can be called from interrupt or base context.
1103 */
1104 int
hermon_mr_reregister_buf(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1105 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1106 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1107 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1108 {
1109 hermon_bind_info_t bind;
1110 int status;
1111
1112 /*
1113 * Fill in the "bind" struct. This struct provides the majority
1114 * of the information that will be used to distinguish between an
1115 * "addr" binding (see above) and a "buf" binding (as is the case
1116 * here). The "bind" struct is later passed to hermon_mr_mem_bind()
1117 * which does most of the "heavy lifting" for the Hermon memory
1118 * registration routines. Note: We have chosen to provide
1119 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1120 * not set). It is not critical what value we choose here as it need
1121 * only be unique for the given RKey (which will happen by default),
1122 * so the choice here is somewhat arbitrary.
1123 */
1124 bind.bi_type = HERMON_BINDHDL_BUF;
1125 bind.bi_buf = buf;
1126 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1127 bind.bi_addr = mr_attr->mr_vaddr;
1128 } else {
1129 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1130 }
1131 bind.bi_len = (uint64_t)buf->b_bcount;
1132 bind.bi_flags = mr_attr->mr_flags;
1133 bind.bi_as = NULL;
1134 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1135 return (status);
1136 }
1137
1138
1139 /*
1140 * hermon_mr_sync()
1141 * Context: Can be called from interrupt or base context.
1142 */
1143 /* ARGSUSED */
1144 int
hermon_mr_sync(hermon_state_t * state,ibt_mr_sync_t * mr_segs,size_t num_segs)1145 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1146 {
1147 hermon_mrhdl_t mrhdl;
1148 uint64_t seg_vaddr, seg_len, seg_end;
1149 uint64_t mr_start, mr_end;
1150 uint_t type;
1151 int status, i;
1152
1153 /* Process each of the ibt_mr_sync_t's */
1154 for (i = 0; i < num_segs; i++) {
1155 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1156
1157 /* Check for valid memory region handle */
1158 if (mrhdl == NULL) {
1159 status = IBT_MR_HDL_INVALID;
1160 goto mrsync_fail;
1161 }
1162
1163 mutex_enter(&mrhdl->mr_lock);
1164
1165 /*
1166 * Check here to see if the memory region has already been
1167 * partially deregistered as a result of a
1168 * hermon_umap_umemlock_cb() callback. If so, this is an
1169 * error, return failure.
1170 */
1171 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1172 mutex_exit(&mrhdl->mr_lock);
1173 status = IBT_MR_HDL_INVALID;
1174 goto mrsync_fail;
1175 }
1176
1177 /* Check for valid bounds on sync request */
1178 seg_vaddr = mr_segs[i].ms_vaddr;
1179 seg_len = mr_segs[i].ms_len;
1180 seg_end = seg_vaddr + seg_len - 1;
1181 mr_start = mrhdl->mr_bindinfo.bi_addr;
1182 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1183 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1184 mutex_exit(&mrhdl->mr_lock);
1185 status = IBT_MR_VA_INVALID;
1186 goto mrsync_fail;
1187 }
1188 if ((seg_end < mr_start) || (seg_end > mr_end)) {
1189 mutex_exit(&mrhdl->mr_lock);
1190 status = IBT_MR_LEN_INVALID;
1191 goto mrsync_fail;
1192 }
1193
1194 /* Determine what type (i.e. direction) for sync */
1195 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1196 type = DDI_DMA_SYNC_FORDEV;
1197 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1198 type = DDI_DMA_SYNC_FORCPU;
1199 } else {
1200 mutex_exit(&mrhdl->mr_lock);
1201 status = IBT_INVALID_PARAM;
1202 goto mrsync_fail;
1203 }
1204
1205 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1206 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1207
1208 mutex_exit(&mrhdl->mr_lock);
1209 }
1210
1211 return (DDI_SUCCESS);
1212
1213 mrsync_fail:
1214 return (status);
1215 }
1216
1217
1218 /*
1219 * hermon_mw_alloc()
1220 * Context: Can be called from interrupt or base context.
1221 */
1222 int
hermon_mw_alloc(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mw_flags_t flags,hermon_mwhdl_t * mwhdl)1223 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1224 hermon_mwhdl_t *mwhdl)
1225 {
1226 hermon_rsrc_t *mpt, *rsrc;
1227 hermon_hw_dmpt_t mpt_entry;
1228 hermon_mwhdl_t mw;
1229 uint_t sleep;
1230 int status;
1231
1232 if (state != NULL) /* XXX - bogus test that is always TRUE */
1233 return (IBT_INSUFF_RESOURCE);
1234
1235 /*
1236 * Check the sleep flag. Ensure that it is consistent with the
1237 * current thread context (i.e. if we are currently in the interrupt
1238 * context, then we shouldn't be attempting to sleep).
1239 */
1240 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1241 if ((sleep == HERMON_SLEEP) &&
1242 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1243 status = IBT_INVALID_PARAM;
1244 goto mwalloc_fail;
1245 }
1246
1247 /* Increment the reference count on the protection domain (PD) */
1248 hermon_pd_refcnt_inc(pd);
1249
1250 /*
1251 * Allocate an MPT entry (for use as a memory window). Since the
1252 * Hermon hardware uses the MPT entry for memory regions and for
1253 * memory windows, we will fill in this MPT with all the necessary
1254 * parameters for the memory window. And then (just as we do for
1255 * memory regions) ownership will be passed to the hardware in the
1256 * final step below. If we fail here, we must undo the protection
1257 * domain reference count.
1258 */
1259 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1260 if (status != DDI_SUCCESS) {
1261 status = IBT_INSUFF_RESOURCE;
1262 goto mwalloc_fail1;
1263 }
1264
1265 /*
1266 * Allocate the software structure for tracking the memory window (i.e.
1267 * the Hermon Memory Window handle). Note: This is actually the same
1268 * software structure used for tracking memory regions, but since many
1269 * of the same properties are needed, only a single structure is
1270 * necessary. If we fail here, we must undo the protection domain
1271 * reference count and the previous resource allocation.
1272 */
1273 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1274 if (status != DDI_SUCCESS) {
1275 status = IBT_INSUFF_RESOURCE;
1276 goto mwalloc_fail2;
1277 }
1278 mw = (hermon_mwhdl_t)rsrc->hr_addr;
1279 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1280
1281 /*
1282 * Calculate an "unbound" RKey from MPT index. In much the same way
1283 * as we do for memory regions (above), this key is constructed from
1284 * a "constrained" (which depends on the MPT index) and an
1285 * "unconstrained" portion (which may be arbitrarily chosen).
1286 */
1287 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1288
1289 /*
1290 * Fill in the MPT entry. This is the final step before passing
1291 * ownership of the MPT entry to the Hermon hardware. We use all of
1292 * the information collected/calculated above to fill in the
1293 * requisite portions of the MPT. Note: fewer entries in the MPT
1294 * entry are necessary to allocate a memory window.
1295 */
1296 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1297 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW;
1298 mpt_entry.mem_key = mw->mr_rkey;
1299 mpt_entry.pd = pd->pd_pdnum;
1300 mpt_entry.lr = 1;
1301
1302 /*
1303 * Write the MPT entry to hardware. Lastly, we pass ownership of
1304 * the entry to the hardware. Note: in general, this operation
1305 * shouldn't fail. But if it does, we have to undo everything we've
1306 * done above before returning error.
1307 */
1308 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1309 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1310 if (status != HERMON_CMD_SUCCESS) {
1311 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1312 status);
1313 if (status == HERMON_CMD_INVALID_STATUS) {
1314 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1315 }
1316 status = ibc_get_ci_failure(0);
1317 goto mwalloc_fail3;
1318 }
1319
1320 /*
1321 * Fill in the rest of the Hermon Memory Window handle. Having
1322 * successfully transferred ownership of the MPT, we can update the
1323 * following fields for use in further operations on the MW.
1324 */
1325 mw->mr_mptrsrcp = mpt;
1326 mw->mr_pdhdl = pd;
1327 mw->mr_rsrcp = rsrc;
1328 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey);
1329 *mwhdl = mw;
1330
1331 return (DDI_SUCCESS);
1332
1333 mwalloc_fail3:
1334 hermon_rsrc_free(state, &rsrc);
1335 mwalloc_fail2:
1336 hermon_rsrc_free(state, &mpt);
1337 mwalloc_fail1:
1338 hermon_pd_refcnt_dec(pd);
1339 mwalloc_fail:
1340 return (status);
1341 }
1342
1343
1344 /*
1345 * hermon_mw_free()
1346 * Context: Can be called from interrupt or base context.
1347 */
1348 int
hermon_mw_free(hermon_state_t * state,hermon_mwhdl_t * mwhdl,uint_t sleep)1349 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1350 {
1351 hermon_rsrc_t *mpt, *rsrc;
1352 hermon_mwhdl_t mw;
1353 int status;
1354 hermon_pdhdl_t pd;
1355
1356 /*
1357 * Check the sleep flag. Ensure that it is consistent with the
1358 * current thread context (i.e. if we are currently in the interrupt
1359 * context, then we shouldn't be attempting to sleep).
1360 */
1361 if ((sleep == HERMON_SLEEP) &&
1362 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1363 status = IBT_INVALID_PARAM;
1364 return (status);
1365 }
1366
1367 /*
1368 * Pull all the necessary information from the Hermon Memory Window
1369 * handle. This is necessary here because the resource for the
1370 * MW handle is going to be freed up as part of the this operation.
1371 */
1372 mw = *mwhdl;
1373 mutex_enter(&mw->mr_lock);
1374 mpt = mw->mr_mptrsrcp;
1375 rsrc = mw->mr_rsrcp;
1376 pd = mw->mr_pdhdl;
1377 mutex_exit(&mw->mr_lock);
1378 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1379
1380 /*
1381 * Reclaim the MPT entry from hardware. Note: in general, it is
1382 * unexpected for this operation to return an error.
1383 */
1384 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1385 0, mpt->hr_indx, sleep);
1386 if (status != HERMON_CMD_SUCCESS) {
1387 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1388 status);
1389 if (status == HERMON_CMD_INVALID_STATUS) {
1390 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1391 }
1392 return (ibc_get_ci_failure(0));
1393 }
1394
1395 /* Free the Hermon Memory Window handle */
1396 hermon_rsrc_free(state, &rsrc);
1397
1398 /* Free up the MPT entry resource */
1399 hermon_rsrc_free(state, &mpt);
1400
1401 /* Decrement the reference count on the protection domain (PD) */
1402 hermon_pd_refcnt_dec(pd);
1403
1404 /* Set the mwhdl pointer to NULL and return success */
1405 *mwhdl = NULL;
1406
1407 return (DDI_SUCCESS);
1408 }
1409
1410
1411 /*
1412 * hermon_mr_keycalc()
1413 * Context: Can be called from interrupt or base context.
1414 * NOTE: Produces a key in the form of
1415 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1416 * where K == the arbitrary bits and I == the index
1417 */
1418 uint32_t
hermon_mr_keycalc(uint32_t indx)1419 hermon_mr_keycalc(uint32_t indx)
1420 {
1421 uint32_t tmp_key, tmp_indx;
1422
1423 /*
1424 * Generate a simple key from counter. Note: We increment this
1425 * static variable _intentionally_ without any kind of mutex around
1426 * it. First, single-threading all operations through a single lock
1427 * would be a bad idea (from a performance point-of-view). Second,
1428 * the upper "unconstrained" bits don't really have to be unique
1429 * because the lower bits are guaranteed to be (although we do make a
1430 * best effort to ensure that they are). Third, the window for the
1431 * race (where both threads read and update the counter at the same
1432 * time) is incredibly small.
1433 * And, lastly, we'd like to make this into a "random" key
1434 */
1435 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1436 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1437 tmp_indx = indx & 0xffffff;
1438 return (tmp_key | tmp_indx);
1439 }
1440
1441
1442 /*
1443 * hermon_mr_key_swap()
1444 * Context: Can be called from interrupt or base context.
1445 * NOTE: Produces a key in the form of
1446 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1447 * where K == the arbitrary bits and I == the index
1448 */
1449 uint32_t
hermon_mr_key_swap(uint32_t indx)1450 hermon_mr_key_swap(uint32_t indx)
1451 {
1452 /*
1453 * The memory key format to pass down to the hardware is
1454 * (key[7:0],index[23:0]), which defines the index to the
1455 * hardware resource. When the driver passes this as a memory
1456 * key, (i.e. to retrieve a resource) the format is
1457 * (index[23:0],key[7:0]).
1458 */
1459 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1460 }
1461
1462 /*
1463 * hermon_mr_common_reg()
1464 * Context: Can be called from interrupt or base context.
1465 */
1466 static int
hermon_mr_common_reg(hermon_state_t * state,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)1467 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1468 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1469 hermon_mpt_rsrc_type_t mpt_type)
1470 {
1471 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1472 hermon_umap_db_entry_t *umapdb;
1473 hermon_sw_refcnt_t *swrc_tmp;
1474 hermon_hw_dmpt_t mpt_entry;
1475 hermon_mrhdl_t mr;
1476 ibt_mr_flags_t flags;
1477 hermon_bind_info_t *bh;
1478 ddi_dma_handle_t bind_dmahdl;
1479 ddi_umem_cookie_t umem_cookie;
1480 size_t umem_len;
1481 caddr_t umem_addr;
1482 uint64_t mtt_addr, max_sz;
1483 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1484 int status, umem_flags, bind_override_addr;
1485
1486 /*
1487 * Check the "options" flag. Currently this flag tells the driver
1488 * whether or not the region should be bound normally (i.e. with
1489 * entries written into the PCI IOMMU), whether it should be
1490 * registered to bypass the IOMMU, and whether or not the resulting
1491 * address should be "zero-based" (to aid the alignment restrictions
1492 * for QPs).
1493 */
1494 if (op == NULL) {
1495 bind_type = HERMON_BINDMEM_NORMAL;
1496 bind_dmahdl = NULL;
1497 bind_override_addr = 0;
1498 } else {
1499 bind_type = op->mro_bind_type;
1500 bind_dmahdl = op->mro_bind_dmahdl;
1501 bind_override_addr = op->mro_bind_override_addr;
1502 }
1503
1504 /* check what kind of mpt to use */
1505
1506 /* Extract the flags field from the hermon_bind_info_t */
1507 flags = bind->bi_flags;
1508
1509 /*
1510 * Check for invalid length. Check is the length is zero or if the
1511 * length is larger than the maximum configured value. Return error
1512 * if it is.
1513 */
1514 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1515 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1516 status = IBT_MR_LEN_INVALID;
1517 goto mrcommon_fail;
1518 }
1519
1520 /*
1521 * Check the sleep flag. Ensure that it is consistent with the
1522 * current thread context (i.e. if we are currently in the interrupt
1523 * context, then we shouldn't be attempting to sleep).
1524 */
1525 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1526 if ((sleep == HERMON_SLEEP) &&
1527 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1528 status = IBT_INVALID_PARAM;
1529 goto mrcommon_fail;
1530 }
1531
1532 /* Increment the reference count on the protection domain (PD) */
1533 hermon_pd_refcnt_inc(pd);
1534
1535 /*
1536 * Allocate an MPT entry. This will be filled in with all the
1537 * necessary parameters to define the memory region. And then
1538 * ownership will be passed to the hardware in the final step
1539 * below. If we fail here, we must undo the protection domain
1540 * reference count.
1541 */
1542 if (mpt_type == HERMON_MPT_DMPT) {
1543 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1544 if (status != DDI_SUCCESS) {
1545 status = IBT_INSUFF_RESOURCE;
1546 goto mrcommon_fail1;
1547 }
1548 } else {
1549 mpt = NULL;
1550 }
1551
1552 /*
1553 * Allocate the software structure for tracking the memory region (i.e.
1554 * the Hermon Memory Region handle). If we fail here, we must undo
1555 * the protection domain reference count and the previous resource
1556 * allocation.
1557 */
1558 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1559 if (status != DDI_SUCCESS) {
1560 status = IBT_INSUFF_RESOURCE;
1561 goto mrcommon_fail2;
1562 }
1563 mr = (hermon_mrhdl_t)rsrc->hr_addr;
1564 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1565
1566 /*
1567 * Setup and validate the memory region access flags. This means
1568 * translating the IBTF's enable flags into the access flags that
1569 * will be used in later operations.
1570 */
1571 mr->mr_accflag = 0;
1572 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1573 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1574 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1575 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1576 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1577 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1578 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1579 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1580 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1581 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1582
1583 /*
1584 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1585 * from a certain number of "constrained" bits (the least significant
1586 * bits) and some number of "unconstrained" bits. The constrained
1587 * bits must be set to the index of the entry in the MPT table, but
1588 * the unconstrained bits can be set to any value we wish. Note:
1589 * if no remote access is required, then the RKey value is not filled
1590 * in. Otherwise both Rkey and LKey are given the same value.
1591 */
1592 if (mpt)
1593 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1594
1595 /*
1596 * Determine if the memory is from userland and pin the pages
1597 * with umem_lockmemory() if necessary.
1598 * Then, if this is userland memory, allocate an entry in the
1599 * "userland resources database". This will later be added to
1600 * the database (after all further memory registration operations are
1601 * successful). If we fail here, we must undo the reference counts
1602 * and the previous resource allocations.
1603 */
1604 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1605 if (mr_is_umem) {
1606 umem_len = ptob(btopr(bind->bi_len +
1607 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1608 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1609 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1610 DDI_UMEMLOCK_LONGTERM);
1611 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1612 &umem_cookie, &hermon_umem_cbops, NULL);
1613 if (status != 0) {
1614 status = IBT_INSUFF_RESOURCE;
1615 goto mrcommon_fail3;
1616 }
1617
1618 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1619 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1620
1621 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1622 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1623 if (bind->bi_buf == NULL) {
1624 status = IBT_INSUFF_RESOURCE;
1625 goto mrcommon_fail3;
1626 }
1627 bind->bi_type = HERMON_BINDHDL_UBUF;
1628 bind->bi_buf->b_flags |= B_READ;
1629
1630 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1631 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1632
1633 umapdb = hermon_umap_db_alloc(state->hs_instance,
1634 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1635 (uint64_t)(uintptr_t)rsrc);
1636 if (umapdb == NULL) {
1637 status = IBT_INSUFF_RESOURCE;
1638 goto mrcommon_fail4;
1639 }
1640 }
1641
1642 /*
1643 * Setup the bindinfo for the mtt bind call
1644 */
1645 bh = &mr->mr_bindinfo;
1646 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1647 bcopy(bind, bh, sizeof (hermon_bind_info_t));
1648 bh->bi_bypass = bind_type;
1649 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1650 &mtt_pgsize_bits, mpt != NULL);
1651 if (status != DDI_SUCCESS) {
1652 /*
1653 * When mtt_bind fails, freerbuf has already been done,
1654 * so make sure not to call it again.
1655 */
1656 bind->bi_type = bh->bi_type;
1657 goto mrcommon_fail5;
1658 }
1659 mr->mr_logmttpgsz = mtt_pgsize_bits;
1660
1661 /*
1662 * Allocate MTT reference count (to track shared memory regions).
1663 * This reference count resource may never be used on the given
1664 * memory region, but if it is ever later registered as "shared"
1665 * memory region then this resource will be necessary. If we fail
1666 * here, we do pretty much the same as above to clean up.
1667 */
1668 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1669 &mtt_refcnt);
1670 if (status != DDI_SUCCESS) {
1671 status = IBT_INSUFF_RESOURCE;
1672 goto mrcommon_fail6;
1673 }
1674 mr->mr_mttrefcntp = mtt_refcnt;
1675 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1676 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1677 HERMON_MTT_REFCNT_INIT(swrc_tmp);
1678
1679 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1680
1681 /*
1682 * Fill in the MPT entry. This is the final step before passing
1683 * ownership of the MPT entry to the Hermon hardware. We use all of
1684 * the information collected/calculated above to fill in the
1685 * requisite portions of the MPT. Do this ONLY for DMPTs.
1686 */
1687 if (mpt == NULL)
1688 goto no_passown;
1689
1690 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1691
1692 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
1693 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1694 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1695 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1696 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1697 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1698 mpt_entry.lr = 1;
1699 mpt_entry.phys_addr = 0;
1700 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1701
1702 mpt_entry.entity_sz = mr->mr_logmttpgsz;
1703 mpt_entry.mem_key = mr->mr_lkey;
1704 mpt_entry.pd = pd->pd_pdnum;
1705 mpt_entry.rem_acc_en = 0;
1706 mpt_entry.fast_reg_en = 0;
1707 mpt_entry.en_inval = 0;
1708 mpt_entry.lkey = 0;
1709 mpt_entry.win_cnt = 0;
1710
1711 if (bind_override_addr == 0) {
1712 mpt_entry.start_addr = bh->bi_addr;
1713 } else {
1714 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1715 mpt_entry.start_addr = bh->bi_addr;
1716 }
1717 mpt_entry.reg_win_len = bh->bi_len;
1718
1719 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
1720 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
1721
1722 /*
1723 * Write the MPT entry to hardware. Lastly, we pass ownership of
1724 * the entry to the hardware if needed. Note: in general, this
1725 * operation shouldn't fail. But if it does, we have to undo
1726 * everything we've done above before returning error.
1727 *
1728 * For Hermon, this routine (which is common to the contexts) will only
1729 * set the ownership if needed - the process of passing the context
1730 * itself to HW will take care of setting up the MPT (based on type
1731 * and index).
1732 */
1733
1734 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */
1735 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1736 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1737 if (status != HERMON_CMD_SUCCESS) {
1738 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1739 status);
1740 if (status == HERMON_CMD_INVALID_STATUS) {
1741 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1742 }
1743 status = ibc_get_ci_failure(0);
1744 goto mrcommon_fail7;
1745 }
1746 if (hermon_rdma_debug & 0x4)
1747 IBTF_DPRINTF_L2("mr", " reg: mr %p key %x",
1748 mr, hermon_mr_key_swap(mr->mr_rkey));
1749 no_passown:
1750
1751 /*
1752 * Fill in the rest of the Hermon Memory Region handle. Having
1753 * successfully transferred ownership of the MPT, we can update the
1754 * following fields for use in further operations on the MR.
1755 */
1756 mr->mr_mttaddr = mtt_addr;
1757
1758 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1759 mr->mr_mptrsrcp = mpt;
1760 mr->mr_mttrsrcp = mtt;
1761 mr->mr_pdhdl = pd;
1762 mr->mr_rsrcp = rsrc;
1763 mr->mr_is_umem = mr_is_umem;
1764 mr->mr_is_fmr = 0;
1765 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
1766 mr->mr_umem_cbfunc = NULL;
1767 mr->mr_umem_cbarg1 = NULL;
1768 mr->mr_umem_cbarg2 = NULL;
1769 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
1770 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
1771 mr->mr_mpt_type = mpt_type;
1772
1773 /*
1774 * If this is userland memory, then we need to insert the previously
1775 * allocated entry into the "userland resources database". This will
1776 * allow for later coordination between the hermon_umap_umemlock_cb()
1777 * callback and hermon_mr_deregister().
1778 */
1779 if (mr_is_umem) {
1780 hermon_umap_db_add(umapdb);
1781 }
1782
1783 *mrhdl = mr;
1784
1785 return (DDI_SUCCESS);
1786
1787 /*
1788 * The following is cleanup for all possible failure cases in this routine
1789 */
1790 mrcommon_fail7:
1791 hermon_rsrc_free(state, &mtt_refcnt);
1792 mrcommon_fail6:
1793 hermon_mr_mem_unbind(state, bh);
1794 bind->bi_type = bh->bi_type;
1795 mrcommon_fail5:
1796 if (mr_is_umem) {
1797 hermon_umap_db_free(umapdb);
1798 }
1799 mrcommon_fail4:
1800 if (mr_is_umem) {
1801 /*
1802 * Free up the memory ddi_umem_iosetup() allocates
1803 * internally.
1804 */
1805 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1806 freerbuf(bind->bi_buf);
1807 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1808 bind->bi_type = HERMON_BINDHDL_NONE;
1809 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1810 }
1811 ddi_umem_unlock(umem_cookie);
1812 }
1813 mrcommon_fail3:
1814 hermon_rsrc_free(state, &rsrc);
1815 mrcommon_fail2:
1816 if (mpt != NULL)
1817 hermon_rsrc_free(state, &mpt);
1818 mrcommon_fail1:
1819 hermon_pd_refcnt_dec(pd);
1820 mrcommon_fail:
1821 return (status);
1822 }
1823
1824 /*
1825 * hermon_dma_mr_register()
1826 * Context: Can be called from base context.
1827 */
1828 int
hermon_dma_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_dmr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl)1829 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1830 ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1831 {
1832 hermon_rsrc_t *mpt, *rsrc;
1833 hermon_hw_dmpt_t mpt_entry;
1834 hermon_mrhdl_t mr;
1835 ibt_mr_flags_t flags;
1836 uint_t sleep;
1837 int status;
1838
1839 /* Extract the flags field */
1840 flags = mr_attr->dmr_flags;
1841
1842 /*
1843 * Check the sleep flag. Ensure that it is consistent with the
1844 * current thread context (i.e. if we are currently in the interrupt
1845 * context, then we shouldn't be attempting to sleep).
1846 */
1847 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1848 if ((sleep == HERMON_SLEEP) &&
1849 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1850 status = IBT_INVALID_PARAM;
1851 goto mrcommon_fail;
1852 }
1853
1854 /* Increment the reference count on the protection domain (PD) */
1855 hermon_pd_refcnt_inc(pd);
1856
1857 /*
1858 * Allocate an MPT entry. This will be filled in with all the
1859 * necessary parameters to define the memory region. And then
1860 * ownership will be passed to the hardware in the final step
1861 * below. If we fail here, we must undo the protection domain
1862 * reference count.
1863 */
1864 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1865 if (status != DDI_SUCCESS) {
1866 status = IBT_INSUFF_RESOURCE;
1867 goto mrcommon_fail1;
1868 }
1869
1870 /*
1871 * Allocate the software structure for tracking the memory region (i.e.
1872 * the Hermon Memory Region handle). If we fail here, we must undo
1873 * the protection domain reference count and the previous resource
1874 * allocation.
1875 */
1876 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1877 if (status != DDI_SUCCESS) {
1878 status = IBT_INSUFF_RESOURCE;
1879 goto mrcommon_fail2;
1880 }
1881 mr = (hermon_mrhdl_t)rsrc->hr_addr;
1882 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1883 bzero(mr, sizeof (*mr));
1884
1885 /*
1886 * Setup and validate the memory region access flags. This means
1887 * translating the IBTF's enable flags into the access flags that
1888 * will be used in later operations.
1889 */
1890 mr->mr_accflag = 0;
1891 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1892 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1893 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1894 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1895 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1896 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1897 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1898 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1899 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1900 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1901
1902 /*
1903 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1904 * from a certain number of "constrained" bits (the least significant
1905 * bits) and some number of "unconstrained" bits. The constrained
1906 * bits must be set to the index of the entry in the MPT table, but
1907 * the unconstrained bits can be set to any value we wish. Note:
1908 * if no remote access is required, then the RKey value is not filled
1909 * in. Otherwise both Rkey and LKey are given the same value.
1910 */
1911 if (mpt)
1912 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1913
1914 /*
1915 * Fill in the MPT entry. This is the final step before passing
1916 * ownership of the MPT entry to the Hermon hardware. We use all of
1917 * the information collected/calculated above to fill in the
1918 * requisite portions of the MPT. Do this ONLY for DMPTs.
1919 */
1920 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1921
1922 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
1923 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1924 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1925 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1926 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1927 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1928 mpt_entry.lr = 1;
1929 mpt_entry.phys_addr = 1; /* critical bit for this */
1930 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1931
1932 mpt_entry.entity_sz = mr->mr_logmttpgsz;
1933 mpt_entry.mem_key = mr->mr_lkey;
1934 mpt_entry.pd = pd->pd_pdnum;
1935 mpt_entry.rem_acc_en = 0;
1936 mpt_entry.fast_reg_en = 0;
1937 mpt_entry.en_inval = 0;
1938 mpt_entry.lkey = 0;
1939 mpt_entry.win_cnt = 0;
1940
1941 mpt_entry.start_addr = mr_attr->dmr_paddr;
1942 mpt_entry.reg_win_len = mr_attr->dmr_len;
1943 if (mr_attr->dmr_len == 0)
1944 mpt_entry.len_b64 = 1; /* needed for 2^^64 length */
1945
1946 mpt_entry.mtt_addr_h = 0;
1947 mpt_entry.mtt_addr_l = 0;
1948
1949 /*
1950 * Write the MPT entry to hardware. Lastly, we pass ownership of
1951 * the entry to the hardware if needed. Note: in general, this
1952 * operation shouldn't fail. But if it does, we have to undo
1953 * everything we've done above before returning error.
1954 *
1955 * For Hermon, this routine (which is common to the contexts) will only
1956 * set the ownership if needed - the process of passing the context
1957 * itself to HW will take care of setting up the MPT (based on type
1958 * and index).
1959 */
1960
1961 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */
1962 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1963 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1964 if (status != HERMON_CMD_SUCCESS) {
1965 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1966 status);
1967 if (status == HERMON_CMD_INVALID_STATUS) {
1968 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1969 }
1970 status = ibc_get_ci_failure(0);
1971 goto mrcommon_fail7;
1972 }
1973
1974 /*
1975 * Fill in the rest of the Hermon Memory Region handle. Having
1976 * successfully transferred ownership of the MPT, we can update the
1977 * following fields for use in further operations on the MR.
1978 */
1979 mr->mr_mttaddr = 0;
1980
1981 mr->mr_log2_pgsz = 0;
1982 mr->mr_mptrsrcp = mpt;
1983 mr->mr_mttrsrcp = NULL;
1984 mr->mr_pdhdl = pd;
1985 mr->mr_rsrcp = rsrc;
1986 mr->mr_is_umem = 0;
1987 mr->mr_is_fmr = 0;
1988 mr->mr_umemcookie = NULL;
1989 mr->mr_umem_cbfunc = NULL;
1990 mr->mr_umem_cbarg1 = NULL;
1991 mr->mr_umem_cbarg2 = NULL;
1992 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
1993 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
1994 mr->mr_mpt_type = HERMON_MPT_DMPT;
1995
1996 *mrhdl = mr;
1997
1998 return (DDI_SUCCESS);
1999
2000 /*
2001 * The following is cleanup for all possible failure cases in this routine
2002 */
2003 mrcommon_fail7:
2004 hermon_rsrc_free(state, &rsrc);
2005 mrcommon_fail2:
2006 hermon_rsrc_free(state, &mpt);
2007 mrcommon_fail1:
2008 hermon_pd_refcnt_dec(pd);
2009 mrcommon_fail:
2010 return (status);
2011 }
2012
2013 /*
2014 * hermon_mr_alloc_lkey()
2015 * Context: Can be called from base context.
2016 */
2017 int
hermon_mr_alloc_lkey(hermon_state_t * state,hermon_pdhdl_t pd,ibt_lkey_flags_t flags,uint_t nummtt,hermon_mrhdl_t * mrhdl)2018 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2019 ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2020 {
2021 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
2022 hermon_sw_refcnt_t *swrc_tmp;
2023 hermon_hw_dmpt_t mpt_entry;
2024 hermon_mrhdl_t mr;
2025 uint64_t mtt_addr;
2026 uint_t sleep;
2027 int status;
2028
2029 /* Increment the reference count on the protection domain (PD) */
2030 hermon_pd_refcnt_inc(pd);
2031
2032 sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2033
2034 /*
2035 * Allocate an MPT entry. This will be filled in with "some" of the
2036 * necessary parameters to define the memory region. And then
2037 * ownership will be passed to the hardware in the final step
2038 * below. If we fail here, we must undo the protection domain
2039 * reference count.
2040 *
2041 * The MTTs will get filled in when the FRWR is processed.
2042 */
2043 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2044 if (status != DDI_SUCCESS) {
2045 status = IBT_INSUFF_RESOURCE;
2046 goto alloclkey_fail1;
2047 }
2048
2049 /*
2050 * Allocate the software structure for tracking the memory region (i.e.
2051 * the Hermon Memory Region handle). If we fail here, we must undo
2052 * the protection domain reference count and the previous resource
2053 * allocation.
2054 */
2055 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2056 if (status != DDI_SUCCESS) {
2057 status = IBT_INSUFF_RESOURCE;
2058 goto alloclkey_fail2;
2059 }
2060 mr = (hermon_mrhdl_t)rsrc->hr_addr;
2061 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2062 bzero(mr, sizeof (*mr));
2063 mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2064
2065 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2066
2067 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2068 if (status != DDI_SUCCESS) {
2069 status = IBT_INSUFF_RESOURCE;
2070 goto alloclkey_fail3;
2071 }
2072 mr->mr_logmttpgsz = PAGESHIFT;
2073
2074 /*
2075 * Allocate MTT reference count (to track shared memory regions).
2076 * This reference count resource may never be used on the given
2077 * memory region, but if it is ever later registered as "shared"
2078 * memory region then this resource will be necessary. If we fail
2079 * here, we do pretty much the same as above to clean up.
2080 */
2081 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2082 &mtt_refcnt);
2083 if (status != DDI_SUCCESS) {
2084 status = IBT_INSUFF_RESOURCE;
2085 goto alloclkey_fail4;
2086 }
2087 mr->mr_mttrefcntp = mtt_refcnt;
2088 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2089 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2090 HERMON_MTT_REFCNT_INIT(swrc_tmp);
2091
2092 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2093
2094 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2095 mpt_entry.status = HERMON_MPT_FREE;
2096 mpt_entry.lw = 1;
2097 mpt_entry.lr = 1;
2098 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2099 mpt_entry.entity_sz = mr->mr_logmttpgsz;
2100 mpt_entry.mem_key = mr->mr_lkey;
2101 mpt_entry.pd = pd->pd_pdnum;
2102 mpt_entry.fast_reg_en = 1;
2103 mpt_entry.rem_acc_en = 1;
2104 mpt_entry.en_inval = 1;
2105 if (flags & IBT_KEY_REMOTE) {
2106 mpt_entry.ren_inval = 1;
2107 }
2108 mpt_entry.mtt_size = nummtt;
2109 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
2110 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
2111
2112 /*
2113 * Write the MPT entry to hardware. Lastly, we pass ownership of
2114 * the entry to the hardware if needed. Note: in general, this
2115 * operation shouldn't fail. But if it does, we have to undo
2116 * everything we've done above before returning error.
2117 *
2118 * For Hermon, this routine (which is common to the contexts) will only
2119 * set the ownership if needed - the process of passing the context
2120 * itself to HW will take care of setting up the MPT (based on type
2121 * and index).
2122 */
2123 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2124 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2125 if (status != HERMON_CMD_SUCCESS) {
2126 cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2127 "failed: %08x\n", status);
2128 if (status == HERMON_CMD_INVALID_STATUS) {
2129 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2130 }
2131 status = ibc_get_ci_failure(0);
2132 goto alloclkey_fail5;
2133 }
2134
2135 /*
2136 * Fill in the rest of the Hermon Memory Region handle. Having
2137 * successfully transferred ownership of the MPT, we can update the
2138 * following fields for use in further operations on the MR.
2139 */
2140 mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2141 mr->mr_mttaddr = mtt_addr;
2142 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2143 mr->mr_mptrsrcp = mpt;
2144 mr->mr_mttrsrcp = mtt;
2145 mr->mr_pdhdl = pd;
2146 mr->mr_rsrcp = rsrc;
2147 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2148 mr->mr_rkey = mr->mr_lkey;
2149 mr->mr_mpt_type = HERMON_MPT_DMPT;
2150
2151 *mrhdl = mr;
2152 return (DDI_SUCCESS);
2153
2154 alloclkey_fail5:
2155 hermon_rsrc_free(state, &mtt_refcnt);
2156 alloclkey_fail4:
2157 hermon_rsrc_free(state, &mtt);
2158 alloclkey_fail3:
2159 hermon_rsrc_free(state, &rsrc);
2160 alloclkey_fail2:
2161 hermon_rsrc_free(state, &mpt);
2162 alloclkey_fail1:
2163 hermon_pd_refcnt_dec(pd);
2164 return (status);
2165 }
2166
2167 /*
2168 * hermon_mr_fexch_mpt_init()
2169 * Context: Can be called from base context.
2170 *
2171 * This is the same as alloc_lkey, but not returning an mrhdl.
2172 */
2173 int
hermon_mr_fexch_mpt_init(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t nummtt,uint64_t mtt_addr,uint_t sleep)2174 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2175 uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2176 {
2177 hermon_hw_dmpt_t mpt_entry;
2178 int status;
2179
2180 /*
2181 * The MTTs will get filled in when the FRWR is processed.
2182 */
2183
2184 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2185 mpt_entry.status = HERMON_MPT_FREE;
2186 mpt_entry.lw = 1;
2187 mpt_entry.lr = 1;
2188 mpt_entry.rw = 1;
2189 mpt_entry.rr = 1;
2190 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2191 mpt_entry.entity_sz = PAGESHIFT;
2192 mpt_entry.mem_key = mpt_indx;
2193 mpt_entry.pd = pd->pd_pdnum;
2194 mpt_entry.fast_reg_en = 1;
2195 mpt_entry.rem_acc_en = 1;
2196 mpt_entry.en_inval = 1;
2197 mpt_entry.ren_inval = 1;
2198 mpt_entry.mtt_size = nummtt;
2199 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
2200 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
2201
2202 /*
2203 * Write the MPT entry to hardware. Lastly, we pass ownership of
2204 * the entry to the hardware if needed. Note: in general, this
2205 * operation shouldn't fail. But if it does, we have to undo
2206 * everything we've done above before returning error.
2207 *
2208 * For Hermon, this routine (which is common to the contexts) will only
2209 * set the ownership if needed - the process of passing the context
2210 * itself to HW will take care of setting up the MPT (based on type
2211 * and index).
2212 */
2213 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2214 sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2215 if (status != HERMON_CMD_SUCCESS) {
2216 cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2217 "failed: %08x\n", status);
2218 if (status == HERMON_CMD_INVALID_STATUS) {
2219 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2220 }
2221 status = ibc_get_ci_failure(0);
2222 return (status);
2223 }
2224 /* Increment the reference count on the protection domain (PD) */
2225 hermon_pd_refcnt_inc(pd);
2226
2227 return (DDI_SUCCESS);
2228 }
2229
2230 /*
2231 * hermon_mr_fexch_mpt_fini()
2232 * Context: Can be called from base context.
2233 *
2234 * This is the same as deregister_mr, without an mrhdl.
2235 */
2236 int
hermon_mr_fexch_mpt_fini(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t sleep)2237 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2238 uint32_t mpt_indx, uint_t sleep)
2239 {
2240 int status;
2241
2242 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2243 NULL, 0, mpt_indx, sleep);
2244 if (status != DDI_SUCCESS) {
2245 cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2246 "failed: %08x\n", status);
2247 if (status == HERMON_CMD_INVALID_STATUS) {
2248 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2249 }
2250 status = ibc_get_ci_failure(0);
2251 return (status);
2252 }
2253
2254 /* Decrement the reference count on the protection domain (PD) */
2255 hermon_pd_refcnt_dec(pd);
2256
2257 return (DDI_SUCCESS);
2258 }
2259
2260 /*
2261 * hermon_mr_mtt_bind()
2262 * Context: Can be called from interrupt or base context.
2263 */
2264 int
hermon_mr_mtt_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t bind_dmahdl,hermon_rsrc_t ** mtt,uint_t * mtt_pgsize_bits,uint_t is_buffer)2265 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2266 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2267 uint_t is_buffer)
2268 {
2269 uint64_t nummtt;
2270 uint_t sleep;
2271 int status;
2272
2273 /*
2274 * Check the sleep flag. Ensure that it is consistent with the
2275 * current thread context (i.e. if we are currently in the interrupt
2276 * context, then we shouldn't be attempting to sleep).
2277 */
2278 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2279 HERMON_NOSLEEP : HERMON_SLEEP;
2280 if ((sleep == HERMON_SLEEP) &&
2281 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2282 status = IBT_INVALID_PARAM;
2283 goto mrmttbind_fail;
2284 }
2285
2286 /*
2287 * Bind the memory and determine the mapped addresses. This is
2288 * the first of two routines that do all the "heavy lifting" for
2289 * the Hermon memory registration routines. The hermon_mr_mem_bind()
2290 * routine takes the "bind" struct with all its fields filled
2291 * in and returns a list of DMA cookies (for the PCI mapped addresses
2292 * corresponding to the specified address region) which are used by
2293 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we
2294 * must undo all the previous resource allocation (and PD reference
2295 * count).
2296 */
2297 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2298 if (status != DDI_SUCCESS) {
2299 status = IBT_INSUFF_RESOURCE;
2300 goto mrmttbind_fail;
2301 }
2302
2303 /*
2304 * Determine number of pages spanned. This routine uses the
2305 * information in the "bind" struct to determine the required
2306 * number of MTT entries needed (and returns the suggested page size -
2307 * as a "power-of-2" - for each MTT entry).
2308 */
2309 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2310
2311 /*
2312 * Allocate the MTT entries. Use the calculations performed above to
2313 * allocate the required number of MTT entries. If we fail here, we
2314 * must not only undo all the previous resource allocation (and PD
2315 * reference count), but we must also unbind the memory.
2316 */
2317 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2318 if (status != DDI_SUCCESS) {
2319 status = IBT_INSUFF_RESOURCE;
2320 goto mrmttbind_fail2;
2321 }
2322
2323 /*
2324 * Write the mapped addresses into the MTT entries. This is part two
2325 * of the "heavy lifting" routines that we talked about above. Note:
2326 * we pass the suggested page size from the earlier operation here.
2327 * And if we fail here, we again do pretty much the same huge clean up.
2328 */
2329 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2330 if (status != DDI_SUCCESS) {
2331 /*
2332 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2333 * only if it detects a HW error during DMA.
2334 */
2335 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2336 status = ibc_get_ci_failure(0);
2337 goto mrmttbind_fail3;
2338 }
2339 return (DDI_SUCCESS);
2340
2341 /*
2342 * The following is cleanup for all possible failure cases in this routine
2343 */
2344 mrmttbind_fail3:
2345 hermon_rsrc_free(state, mtt);
2346 mrmttbind_fail2:
2347 hermon_mr_mem_unbind(state, bind);
2348 mrmttbind_fail:
2349 return (status);
2350 }
2351
2352
2353 /*
2354 * hermon_mr_mtt_unbind()
2355 * Context: Can be called from interrupt or base context.
2356 */
2357 int
hermon_mr_mtt_unbind(hermon_state_t * state,hermon_bind_info_t * bind,hermon_rsrc_t * mtt)2358 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2359 hermon_rsrc_t *mtt)
2360 {
2361 /*
2362 * Free up the MTT entries and unbind the memory. Here, as above, we
2363 * attempt to free these resources only if it is appropriate to do so.
2364 */
2365 hermon_mr_mem_unbind(state, bind);
2366 hermon_rsrc_free(state, &mtt);
2367
2368 return (DDI_SUCCESS);
2369 }
2370
2371
2372 /*
2373 * hermon_mr_common_rereg()
2374 * Context: Can be called from interrupt or base context.
2375 */
2376 static int
hermon_mr_common_rereg(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)2377 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2378 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2379 hermon_mr_options_t *op)
2380 {
2381 hermon_rsrc_t *mpt;
2382 ibt_mr_attr_flags_t acc_flags_to_use;
2383 ibt_mr_flags_t flags;
2384 hermon_pdhdl_t pd_to_use;
2385 hermon_hw_dmpt_t mpt_entry;
2386 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
2387 uint_t sleep, dereg_level;
2388 int status;
2389
2390 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2391
2392 /*
2393 * Check here to see if the memory region corresponds to a userland
2394 * mapping. Reregistration of userland memory regions is not
2395 * currently supported. Return failure.
2396 */
2397 if (mr->mr_is_umem) {
2398 status = IBT_MR_HDL_INVALID;
2399 goto mrrereg_fail;
2400 }
2401
2402 mutex_enter(&mr->mr_lock);
2403
2404 /* Pull MPT resource pointer from the Hermon Memory Region handle */
2405 mpt = mr->mr_mptrsrcp;
2406
2407 /* Extract the flags field from the hermon_bind_info_t */
2408 flags = bind->bi_flags;
2409
2410 /*
2411 * Check the sleep flag. Ensure that it is consistent with the
2412 * current thread context (i.e. if we are currently in the interrupt
2413 * context, then we shouldn't be attempting to sleep).
2414 */
2415 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2416 if ((sleep == HERMON_SLEEP) &&
2417 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2418 mutex_exit(&mr->mr_lock);
2419 status = IBT_INVALID_PARAM;
2420 goto mrrereg_fail;
2421 }
2422
2423 /*
2424 * First step is to temporarily invalidate the MPT entry. This
2425 * regains ownership from the hardware, and gives us the opportunity
2426 * to modify the entry. Note: The HW2SW_MPT command returns the
2427 * current MPT entry contents. These are saved away here because
2428 * they will be reused in a later step below. If the region has
2429 * bound memory windows that we fail returning an "in use" error code.
2430 * Otherwise, this is an unexpected error and we deregister the
2431 * memory region and return error.
2432 *
2433 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2434 * against holding the lock around this rereg call in all contexts.
2435 */
2436 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2437 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2438 if (status != HERMON_CMD_SUCCESS) {
2439 mutex_exit(&mr->mr_lock);
2440 if (status == HERMON_CMD_REG_BOUND) {
2441 return (IBT_MR_IN_USE);
2442 } else {
2443 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2444 "%08x\n", status);
2445 if (status == HERMON_CMD_INVALID_STATUS) {
2446 hermon_fm_ereport(state, HCA_SYS_ERR,
2447 HCA_ERR_SRV_LOST);
2448 }
2449 /*
2450 * Call deregister and ensure that all current
2451 * resources get freed up
2452 */
2453 if (hermon_mr_deregister(state, &mr,
2454 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2455 HERMON_WARNING(state, "failed to deregister "
2456 "memory region");
2457 }
2458 return (ibc_get_ci_failure(0));
2459 }
2460 }
2461
2462 /*
2463 * If we're changing the protection domain, then validate the new one
2464 */
2465 if (flags & IBT_MR_CHANGE_PD) {
2466
2467 /* Check for valid PD handle pointer */
2468 if (pd == NULL) {
2469 mutex_exit(&mr->mr_lock);
2470 /*
2471 * Call deregister and ensure that all current
2472 * resources get properly freed up. Unnecessary
2473 * here to attempt to regain software ownership
2474 * of the MPT entry as that has already been
2475 * done above.
2476 */
2477 if (hermon_mr_deregister(state, &mr,
2478 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2479 DDI_SUCCESS) {
2480 HERMON_WARNING(state, "failed to deregister "
2481 "memory region");
2482 }
2483 status = IBT_PD_HDL_INVALID;
2484 goto mrrereg_fail;
2485 }
2486
2487 /* Use the new PD handle in all operations below */
2488 pd_to_use = pd;
2489
2490 } else {
2491 /* Use the current PD handle in all operations below */
2492 pd_to_use = mr->mr_pdhdl;
2493 }
2494
2495 /*
2496 * If we're changing access permissions, then validate the new ones
2497 */
2498 if (flags & IBT_MR_CHANGE_ACCESS) {
2499 /*
2500 * Validate the access flags. Both remote write and remote
2501 * atomic require the local write flag to be set
2502 */
2503 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2504 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2505 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2506 mutex_exit(&mr->mr_lock);
2507 /*
2508 * Call deregister and ensure that all current
2509 * resources get properly freed up. Unnecessary
2510 * here to attempt to regain software ownership
2511 * of the MPT entry as that has already been
2512 * done above.
2513 */
2514 if (hermon_mr_deregister(state, &mr,
2515 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2516 DDI_SUCCESS) {
2517 HERMON_WARNING(state, "failed to deregister "
2518 "memory region");
2519 }
2520 status = IBT_MR_ACCESS_REQ_INVALID;
2521 goto mrrereg_fail;
2522 }
2523
2524 /*
2525 * Setup and validate the memory region access flags. This
2526 * means translating the IBTF's enable flags into the access
2527 * flags that will be used in later operations.
2528 */
2529 acc_flags_to_use = 0;
2530 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2531 acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2532 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2533 acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2534 if (flags & IBT_MR_ENABLE_REMOTE_READ)
2535 acc_flags_to_use |= IBT_MR_REMOTE_READ;
2536 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2537 acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2538 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2539 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2540
2541 } else {
2542 acc_flags_to_use = mr->mr_accflag;
2543 }
2544
2545 /*
2546 * If we're modifying the translation, then figure out whether
2547 * we can reuse the current MTT resources. This means calling
2548 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2549 * for the reregistration. If the current memory region contains
2550 * sufficient MTT entries for the new regions, then it will be
2551 * reused and filled in. Otherwise, new entries will be allocated,
2552 * the old ones will be freed, and the new entries will be filled
2553 * in. Note: If we're not modifying the translation, then we
2554 * should already have all the information we need to update the MPT.
2555 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2556 * a "dereg_level" which is the level of cleanup that needs to be
2557 * passed to hermon_mr_deregister() to finish the cleanup.
2558 */
2559 if (flags & IBT_MR_CHANGE_TRANSLATION) {
2560 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2561 &mtt_addr_to_use, sleep, &dereg_level);
2562 if (status != DDI_SUCCESS) {
2563 mutex_exit(&mr->mr_lock);
2564 /*
2565 * Call deregister and ensure that all resources get
2566 * properly freed up.
2567 */
2568 if (hermon_mr_deregister(state, &mr, dereg_level,
2569 sleep) != DDI_SUCCESS) {
2570 HERMON_WARNING(state, "failed to deregister "
2571 "memory region");
2572 }
2573 goto mrrereg_fail;
2574 }
2575 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2576 len_to_use = mr->mr_bindinfo.bi_len;
2577 } else {
2578 mtt_addr_to_use = mr->mr_mttaddr;
2579 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2580 len_to_use = mr->mr_bindinfo.bi_len;
2581 }
2582
2583 /*
2584 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were
2585 * when the region was first registered, each key is formed from
2586 * "constrained" bits and "unconstrained" bits. Note: If no remote
2587 * access is required, then the RKey value is not filled in. Otherwise
2588 * both Rkey and LKey are given the same value.
2589 */
2590 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2591 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2592 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2593 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2594 mr->mr_rkey = mr->mr_lkey;
2595 } else
2596 mr->mr_rkey = 0;
2597
2598 /*
2599 * Fill in the MPT entry. This is the final step before passing
2600 * ownership of the MPT entry to the Hermon hardware. We use all of
2601 * the information collected/calculated above to fill in the
2602 * requisite portions of the MPT.
2603 */
2604 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2605
2606 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
2607 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0;
2608 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2609 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0;
2610 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0;
2611 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0;
2612 mpt_entry.lr = 1;
2613 mpt_entry.phys_addr = 0;
2614 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2615
2616 mpt_entry.entity_sz = mr->mr_logmttpgsz;
2617 mpt_entry.mem_key = mr->mr_lkey;
2618 mpt_entry.pd = pd_to_use->pd_pdnum;
2619
2620 mpt_entry.start_addr = vaddr_to_use;
2621 mpt_entry.reg_win_len = len_to_use;
2622 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2623 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2624
2625 /*
2626 * Write the updated MPT entry to hardware
2627 *
2628 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2629 * against holding the lock around this rereg call in all contexts.
2630 */
2631 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2632 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2633 if (status != HERMON_CMD_SUCCESS) {
2634 mutex_exit(&mr->mr_lock);
2635 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2636 status);
2637 if (status == HERMON_CMD_INVALID_STATUS) {
2638 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2639 }
2640 /*
2641 * Call deregister and ensure that all current resources get
2642 * properly freed up. Unnecessary here to attempt to regain
2643 * software ownership of the MPT entry as that has already
2644 * been done above.
2645 */
2646 if (hermon_mr_deregister(state, &mr,
2647 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2648 HERMON_WARNING(state, "failed to deregister memory "
2649 "region");
2650 }
2651 return (ibc_get_ci_failure(0));
2652 }
2653
2654 /*
2655 * If we're changing PD, then update their reference counts now.
2656 * This means decrementing the reference count on the old PD and
2657 * incrementing the reference count on the new PD.
2658 */
2659 if (flags & IBT_MR_CHANGE_PD) {
2660 hermon_pd_refcnt_dec(mr->mr_pdhdl);
2661 hermon_pd_refcnt_inc(pd);
2662 }
2663
2664 /*
2665 * Update the contents of the Hermon Memory Region handle to reflect
2666 * what has been changed.
2667 */
2668 mr->mr_pdhdl = pd_to_use;
2669 mr->mr_accflag = acc_flags_to_use;
2670 mr->mr_is_umem = 0;
2671 mr->mr_is_fmr = 0;
2672 mr->mr_umemcookie = NULL;
2673 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2674 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
2675
2676 /* New MR handle is same as the old */
2677 *mrhdl_new = mr;
2678 mutex_exit(&mr->mr_lock);
2679
2680 return (DDI_SUCCESS);
2681
2682 mrrereg_fail:
2683 return (status);
2684 }
2685
2686
2687 /*
2688 * hermon_mr_rereg_xlat_helper
2689 * Context: Can be called from interrupt or base context.
2690 * Note: This routine expects the "mr_lock" to be held when it
2691 * is called. Upon returning failure, this routine passes information
2692 * about what "dereg_level" should be passed to hermon_mr_deregister().
2693 */
2694 static int
hermon_mr_rereg_xlat_helper(hermon_state_t * state,hermon_mrhdl_t mr,hermon_bind_info_t * bind,hermon_mr_options_t * op,uint64_t * mtt_addr,uint_t sleep,uint_t * dereg_level)2695 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2696 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2697 uint_t sleep, uint_t *dereg_level)
2698 {
2699 hermon_rsrc_t *mtt, *mtt_refcnt;
2700 hermon_sw_refcnt_t *swrc_old, *swrc_new;
2701 ddi_dma_handle_t dmahdl;
2702 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz;
2703 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl;
2704 int status;
2705
2706 ASSERT(MUTEX_HELD(&mr->mr_lock));
2707
2708 /*
2709 * Check the "options" flag. Currently this flag tells the driver
2710 * whether or not the region should be bound normally (i.e. with
2711 * entries written into the PCI IOMMU) or whether it should be
2712 * registered to bypass the IOMMU.
2713 */
2714 if (op == NULL) {
2715 bind_type = HERMON_BINDMEM_NORMAL;
2716 } else {
2717 bind_type = op->mro_bind_type;
2718 }
2719
2720 /*
2721 * Check for invalid length. Check is the length is zero or if the
2722 * length is larger than the maximum configured value. Return error
2723 * if it is.
2724 */
2725 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2726 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2727 /*
2728 * Deregister will be called upon returning failure from this
2729 * routine. This will ensure that all current resources get
2730 * properly freed up. Unnecessary to attempt to regain
2731 * software ownership of the MPT entry as that has already
2732 * been done above (in hermon_mr_reregister())
2733 */
2734 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2735
2736 status = IBT_MR_LEN_INVALID;
2737 goto mrrereghelp_fail;
2738 }
2739
2740 /*
2741 * Determine the number of pages necessary for new region and the
2742 * number of pages supported by the current MTT resources
2743 */
2744 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2745 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2746
2747 /*
2748 * Depending on whether we have enough pages or not, the next step is
2749 * to fill in a set of MTT entries that reflect the new mapping. In
2750 * the first case below, we already have enough entries. This means
2751 * we need to unbind the memory from the previous mapping, bind the
2752 * memory for the new mapping, write the new MTT entries, and update
2753 * the mr to reflect the changes.
2754 * In the second case below, we do not have enough entries in the
2755 * current mapping. So, in this case, we need not only to unbind the
2756 * current mapping, but we need to free up the MTT resources associated
2757 * with that mapping. After we've successfully done that, we continue
2758 * by binding the new memory, allocating new MTT entries, writing the
2759 * new MTT entries, and updating the mr to reflect the changes.
2760 */
2761
2762 /*
2763 * If this region is being shared (i.e. MTT refcount != 1), then we
2764 * can't reuse the current MTT resources regardless of their size.
2765 * Instead we'll need to alloc new ones (below) just as if there
2766 * hadn't been enough room in the current entries.
2767 */
2768 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2769 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2770 (nummtt_needed <= nummtt_in_currrsrc)) {
2771
2772 /*
2773 * Unbind the old mapping for this memory region, but retain
2774 * the ddi_dma_handle_t (if possible) for reuse in the bind
2775 * operation below. Note: If original memory region was
2776 * bound for IOMMU bypass and the new region can not use
2777 * bypass, then a new DMA handle will be necessary.
2778 */
2779 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2780 mr->mr_bindinfo.bi_free_dmahdl = 0;
2781 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2782 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2783 reuse_dmahdl = 1;
2784 } else {
2785 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2786 dmahdl = NULL;
2787 reuse_dmahdl = 0;
2788 }
2789
2790 /*
2791 * Bind the new memory and determine the mapped addresses.
2792 * As described, this routine and hermon_mr_fast_mtt_write()
2793 * do the majority of the work for the memory registration
2794 * operations. Note: When we successfully finish the binding,
2795 * we will set the "bi_free_dmahdl" flag to indicate that
2796 * even though we may have reused the ddi_dma_handle_t we do
2797 * wish it to be freed up at some later time. Note also that
2798 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2799 */
2800 bind->bi_bypass = bind_type;
2801 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2802 if (status != DDI_SUCCESS) {
2803 if (reuse_dmahdl) {
2804 ddi_dma_free_handle(&dmahdl);
2805 }
2806
2807 /*
2808 * Deregister will be called upon returning failure
2809 * from this routine. This will ensure that all
2810 * current resources get properly freed up.
2811 * Unnecessary to attempt to regain software ownership
2812 * of the MPT entry as that has already been done
2813 * above (in hermon_mr_reregister()). Also unnecessary
2814 * to attempt to unbind the memory.
2815 */
2816 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2817
2818 status = IBT_INSUFF_RESOURCE;
2819 goto mrrereghelp_fail;
2820 }
2821 if (reuse_dmahdl) {
2822 bind->bi_free_dmahdl = 1;
2823 }
2824
2825 /*
2826 * Using the new mapping, but reusing the current MTT
2827 * resources, write the updated entries to MTT
2828 */
2829 mtt = mr->mr_mttrsrcp;
2830 status = hermon_mr_fast_mtt_write(state, mtt, bind,
2831 mtt_pgsize_bits);
2832 if (status != DDI_SUCCESS) {
2833 /*
2834 * Deregister will be called upon returning failure
2835 * from this routine. This will ensure that all
2836 * current resources get properly freed up.
2837 * Unnecessary to attempt to regain software ownership
2838 * of the MPT entry as that has already been done
2839 * above (in hermon_mr_reregister()). Also unnecessary
2840 * to attempt to unbind the memory.
2841 *
2842 * But we do need to unbind the newly bound memory
2843 * before returning.
2844 */
2845 hermon_mr_mem_unbind(state, bind);
2846 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2847
2848 /*
2849 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2850 * only if it detects a HW error during DMA.
2851 */
2852 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2853 status = ibc_get_ci_failure(0);
2854 goto mrrereghelp_fail;
2855 }
2856
2857 /* Put the updated information into the Mem Region handle */
2858 mr->mr_bindinfo = *bind;
2859 mr->mr_logmttpgsz = mtt_pgsize_bits;
2860
2861 } else {
2862 /*
2863 * Check if the memory region MTT is shared by any other MRs.
2864 * Since the resource may be shared between multiple memory
2865 * regions (as a result of a "RegisterSharedMR()" verb) it is
2866 * important that we not unbind any resources prematurely.
2867 */
2868 if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2869 /*
2870 * Unbind the old mapping for this memory region, but
2871 * retain the ddi_dma_handle_t for reuse in the bind
2872 * operation below. Note: This can only be done here
2873 * because the region being reregistered is not
2874 * currently shared. Also if original memory region
2875 * was bound for IOMMU bypass and the new region can
2876 * not use bypass, then a new DMA handle will be
2877 * necessary.
2878 */
2879 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2880 mr->mr_bindinfo.bi_free_dmahdl = 0;
2881 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2882 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2883 reuse_dmahdl = 1;
2884 } else {
2885 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2886 dmahdl = NULL;
2887 reuse_dmahdl = 0;
2888 }
2889 } else {
2890 dmahdl = NULL;
2891 reuse_dmahdl = 0;
2892 }
2893
2894 /*
2895 * Bind the new memory and determine the mapped addresses.
2896 * As described, this routine and hermon_mr_fast_mtt_write()
2897 * do the majority of the work for the memory registration
2898 * operations. Note: When we successfully finish the binding,
2899 * we will set the "bi_free_dmahdl" flag to indicate that
2900 * even though we may have reused the ddi_dma_handle_t we do
2901 * wish it to be freed up at some later time. Note also that
2902 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2903 */
2904 bind->bi_bypass = bind_type;
2905 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2906 if (status != DDI_SUCCESS) {
2907 if (reuse_dmahdl) {
2908 ddi_dma_free_handle(&dmahdl);
2909 }
2910
2911 /*
2912 * Deregister will be called upon returning failure
2913 * from this routine. This will ensure that all
2914 * current resources get properly freed up.
2915 * Unnecessary to attempt to regain software ownership
2916 * of the MPT entry as that has already been done
2917 * above (in hermon_mr_reregister()). Also unnecessary
2918 * to attempt to unbind the memory.
2919 */
2920 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2921
2922 status = IBT_INSUFF_RESOURCE;
2923 goto mrrereghelp_fail;
2924 }
2925 if (reuse_dmahdl) {
2926 bind->bi_free_dmahdl = 1;
2927 }
2928
2929 /*
2930 * Allocate the new MTT entries resource
2931 */
2932 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2933 sleep, &mtt);
2934 if (status != DDI_SUCCESS) {
2935 /*
2936 * Deregister will be called upon returning failure
2937 * from this routine. This will ensure that all
2938 * current resources get properly freed up.
2939 * Unnecessary to attempt to regain software ownership
2940 * of the MPT entry as that has already been done
2941 * above (in hermon_mr_reregister()). Also unnecessary
2942 * to attempt to unbind the memory.
2943 *
2944 * But we do need to unbind the newly bound memory
2945 * before returning.
2946 */
2947 hermon_mr_mem_unbind(state, bind);
2948 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2949
2950 status = IBT_INSUFF_RESOURCE;
2951 goto mrrereghelp_fail;
2952 }
2953
2954 /*
2955 * Allocate MTT reference count (to track shared memory
2956 * regions). As mentioned elsewhere above, this reference
2957 * count resource may never be used on the given memory region,
2958 * but if it is ever later registered as a "shared" memory
2959 * region then this resource will be necessary. Note: This
2960 * is only necessary here if the existing memory region is
2961 * already being shared (because otherwise we already have
2962 * a useable reference count resource).
2963 */
2964 if (HERMON_MTT_IS_SHARED(swrc_old)) {
2965 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2966 sleep, &mtt_refcnt);
2967 if (status != DDI_SUCCESS) {
2968 /*
2969 * Deregister will be called upon returning
2970 * failure from this routine. This will ensure
2971 * that all current resources get properly
2972 * freed up. Unnecessary to attempt to regain
2973 * software ownership of the MPT entry as that
2974 * has already been done above (in
2975 * hermon_mr_reregister()). Also unnecessary
2976 * to attempt to unbind the memory.
2977 *
2978 * But we need to unbind the newly bound
2979 * memory and free up the newly allocated MTT
2980 * entries before returning.
2981 */
2982 hermon_mr_mem_unbind(state, bind);
2983 hermon_rsrc_free(state, &mtt);
2984 *dereg_level =
2985 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2986
2987 status = IBT_INSUFF_RESOURCE;
2988 goto mrrereghelp_fail;
2989 }
2990 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2991 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2992 HERMON_MTT_REFCNT_INIT(swrc_new);
2993 } else {
2994 mtt_refcnt = mr->mr_mttrefcntp;
2995 }
2996
2997 /*
2998 * Using the new mapping and the new MTT resources, write the
2999 * updated entries to MTT
3000 */
3001 status = hermon_mr_fast_mtt_write(state, mtt, bind,
3002 mtt_pgsize_bits);
3003 if (status != DDI_SUCCESS) {
3004 /*
3005 * Deregister will be called upon returning failure
3006 * from this routine. This will ensure that all
3007 * current resources get properly freed up.
3008 * Unnecessary to attempt to regain software ownership
3009 * of the MPT entry as that has already been done
3010 * above (in hermon_mr_reregister()). Also unnecessary
3011 * to attempt to unbind the memory.
3012 *
3013 * But we need to unbind the newly bound memory,
3014 * free up the newly allocated MTT entries, and
3015 * (possibly) free the new MTT reference count
3016 * resource before returning.
3017 */
3018 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3019 hermon_rsrc_free(state, &mtt_refcnt);
3020 }
3021 hermon_mr_mem_unbind(state, bind);
3022 hermon_rsrc_free(state, &mtt);
3023 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3024
3025 status = IBT_INSUFF_RESOURCE;
3026 goto mrrereghelp_fail;
3027 }
3028
3029 /*
3030 * Check if the memory region MTT is shared by any other MRs.
3031 * Since the resource may be shared between multiple memory
3032 * regions (as a result of a "RegisterSharedMR()" verb) it is
3033 * important that we not free up any resources prematurely.
3034 */
3035 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3036 /* Decrement MTT reference count for "old" region */
3037 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3038 } else {
3039 /* Free up the old MTT entries resource */
3040 hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3041 }
3042
3043 /* Put the updated information into the mrhdl */
3044 mr->mr_bindinfo = *bind;
3045 mr->mr_logmttpgsz = mtt_pgsize_bits;
3046 mr->mr_mttrsrcp = mtt;
3047 mr->mr_mttrefcntp = mtt_refcnt;
3048 }
3049
3050 /*
3051 * Calculate and return the updated MTT address (in the DDR address
3052 * space). This will be used by the caller (hermon_mr_reregister) in
3053 * the updated MPT entry
3054 */
3055 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3056
3057 return (DDI_SUCCESS);
3058
3059 mrrereghelp_fail:
3060 return (status);
3061 }
3062
3063
3064 /*
3065 * hermon_mr_nummtt_needed()
3066 * Context: Can be called from interrupt or base context.
3067 */
3068 /* ARGSUSED */
3069 static uint64_t
hermon_mr_nummtt_needed(hermon_state_t * state,hermon_bind_info_t * bind,uint_t * mtt_pgsize_bits)3070 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3071 uint_t *mtt_pgsize_bits)
3072 {
3073 uint64_t pg_offset_mask;
3074 uint64_t pg_offset, tmp_length;
3075
3076 /*
3077 * For now we specify the page size as 8Kb (the default page size for
3078 * the sun4u architecture), or 4Kb for x86. Figure out optimal page
3079 * size by examining the dmacookies
3080 */
3081 *mtt_pgsize_bits = PAGESHIFT;
3082
3083 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3084 pg_offset = bind->bi_addr & pg_offset_mask;
3085 tmp_length = pg_offset + (bind->bi_len - 1);
3086 return ((tmp_length >> *mtt_pgsize_bits) + 1);
3087 }
3088
3089
3090 /*
3091 * hermon_mr_mem_bind()
3092 * Context: Can be called from interrupt or base context.
3093 */
3094 static int
hermon_mr_mem_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t dmahdl,uint_t sleep,uint_t is_buffer)3095 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3096 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3097 {
3098 ddi_dma_attr_t dma_attr;
3099 int (*callback)(caddr_t);
3100 int status;
3101
3102 /* bi_type must be set to a meaningful value to get a bind handle */
3103 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3104 bind->bi_type == HERMON_BINDHDL_BUF ||
3105 bind->bi_type == HERMON_BINDHDL_UBUF);
3106
3107 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3108
3109 /* Set the callback flag appropriately */
3110 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3111
3112 /*
3113 * Initialize many of the default DMA attributes. Then, if we're
3114 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3115 */
3116 if (dmahdl == NULL) {
3117 hermon_dma_attr_init(state, &dma_attr);
3118 #ifdef __sparc
3119 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3120 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3121 }
3122 #endif
3123
3124 /* set RO if needed - tunable set and 'is_buffer' is non-0 */
3125 if (is_buffer) {
3126 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3127 if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3128 (hermon_kernel_data_ro ==
3129 HERMON_RO_ENABLED)) {
3130 dma_attr.dma_attr_flags |=
3131 DDI_DMA_RELAXED_ORDERING;
3132 }
3133 if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3134 (hermon_user_data_ro ==
3135 HERMON_RO_ENABLED))) {
3136 dma_attr.dma_attr_flags |=
3137 DDI_DMA_RELAXED_ORDERING;
3138 }
3139 }
3140 }
3141
3142 /* Allocate a DMA handle for the binding */
3143 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3144 callback, NULL, &bind->bi_dmahdl);
3145 if (status != DDI_SUCCESS) {
3146 return (status);
3147 }
3148 bind->bi_free_dmahdl = 1;
3149
3150 } else {
3151 bind->bi_dmahdl = dmahdl;
3152 bind->bi_free_dmahdl = 0;
3153 }
3154
3155
3156 /*
3157 * Bind the memory to get the PCI mapped addresses. The decision
3158 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3159 * is determined by the "bi_type" flag. Note: if the bind operation
3160 * fails then we have to free up the DMA handle and return error.
3161 */
3162 if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3163 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3164 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3165 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3166 &bind->bi_dmacookie, &bind->bi_cookiecnt);
3167
3168 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3169
3170 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3171 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3172 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3173 }
3174 if (status != DDI_DMA_MAPPED) {
3175 if (bind->bi_free_dmahdl != 0) {
3176 ddi_dma_free_handle(&bind->bi_dmahdl);
3177 }
3178 return (status);
3179 }
3180
3181 return (DDI_SUCCESS);
3182 }
3183
3184
3185 /*
3186 * hermon_mr_mem_unbind()
3187 * Context: Can be called from interrupt or base context.
3188 */
3189 static void
hermon_mr_mem_unbind(hermon_state_t * state,hermon_bind_info_t * bind)3190 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3191 {
3192 int status;
3193
3194 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3195 /* there is nothing to unbind for alloc_lkey */
3196 if (bind->bi_type == HERMON_BINDHDL_LKEY)
3197 return;
3198
3199 /*
3200 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3201 * is actually allocated by ddi_umem_iosetup() internally, then
3202 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3203 * not to free it again later.
3204 */
3205 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3206 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3207 freerbuf(bind->bi_buf);
3208 bind->bi_type = HERMON_BINDHDL_NONE;
3209 }
3210 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3211
3212 /*
3213 * Unbind the DMA memory for the region
3214 *
3215 * Note: The only way ddi_dma_unbind_handle() currently
3216 * can return an error is if the handle passed in is invalid.
3217 * Since this should never happen, we choose to return void
3218 * from this function! If this does return an error, however,
3219 * then we print a warning message to the console.
3220 */
3221 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3222 if (status != DDI_SUCCESS) {
3223 HERMON_WARNING(state, "failed to unbind DMA mapping");
3224 return;
3225 }
3226
3227 /* Free up the DMA handle */
3228 if (bind->bi_free_dmahdl != 0) {
3229 ddi_dma_free_handle(&bind->bi_dmahdl);
3230 }
3231 }
3232
3233
3234 /*
3235 * hermon_mr_fast_mtt_write()
3236 * Context: Can be called from interrupt or base context.
3237 */
3238 static int
hermon_mr_fast_mtt_write(hermon_state_t * state,hermon_rsrc_t * mtt,hermon_bind_info_t * bind,uint32_t mtt_pgsize_bits)3239 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3240 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3241 {
3242 hermon_icm_table_t *icm_table;
3243 hermon_dma_info_t *dma_info;
3244 uint32_t index1, index2, rindx;
3245 ddi_dma_cookie_t dmacookie;
3246 uint_t cookie_cnt;
3247 uint64_t *mtt_table;
3248 uint64_t mtt_entry;
3249 uint64_t addr, endaddr;
3250 uint64_t pagesize;
3251 offset_t i, start;
3252 uint_t per_span;
3253 int sync_needed;
3254
3255 /*
3256 * XXX According to the PRM, we are to use the WRITE_MTT
3257 * command to write out MTTs. Tavor does not do this,
3258 * instead taking advantage of direct access to the MTTs,
3259 * and knowledge that Mellanox FMR relies on our ability
3260 * to write directly to the MTTs without any further
3261 * notification to the firmware. Likewise, we will choose
3262 * to not use the WRITE_MTT command, but to simply write
3263 * out the MTTs.
3264 */
3265
3266 /* Calculate page size from the suggested value passed in */
3267 pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3268
3269 /* Walk the "cookie list" and fill in the MTT table entries */
3270 dmacookie = bind->bi_dmacookie;
3271 cookie_cnt = bind->bi_cookiecnt;
3272
3273 icm_table = &state->hs_icm[HERMON_MTT];
3274 rindx = mtt->hr_indx;
3275 hermon_index(index1, index2, rindx, icm_table, i);
3276 start = i;
3277
3278 per_span = icm_table->span;
3279 dma_info = icm_table->icm_dma[index1] + index2;
3280 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3281
3282 sync_needed = 0;
3283 while (cookie_cnt-- > 0) {
3284 addr = dmacookie.dmac_laddress;
3285 endaddr = addr + (dmacookie.dmac_size - 1);
3286 addr = addr & ~((uint64_t)pagesize - 1);
3287
3288 while (addr <= endaddr) {
3289
3290 /*
3291 * Fill in the mapped addresses (calculated above) and
3292 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3293 */
3294 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3295 mtt_table[i] = htonll(mtt_entry);
3296 i++;
3297 rindx++;
3298
3299 if (i == per_span) {
3300
3301 (void) ddi_dma_sync(dma_info->dma_hdl,
3302 start * sizeof (hermon_hw_mtt_t),
3303 (i - start) * sizeof (hermon_hw_mtt_t),
3304 DDI_DMA_SYNC_FORDEV);
3305
3306 if ((addr + pagesize > endaddr) &&
3307 (cookie_cnt == 0))
3308 return (DDI_SUCCESS);
3309
3310 hermon_index(index1, index2, rindx, icm_table,
3311 i);
3312 start = i * sizeof (hermon_hw_mtt_t);
3313 dma_info = icm_table->icm_dma[index1] + index2;
3314 mtt_table =
3315 (uint64_t *)(uintptr_t)dma_info->vaddr;
3316
3317 sync_needed = 0;
3318 } else {
3319 sync_needed = 1;
3320 }
3321
3322 addr += pagesize;
3323 if (addr == 0) {
3324 static int do_once = 1;
3325 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3326 do_once))
3327 if (do_once) {
3328 do_once = 0;
3329 cmn_err(CE_NOTE, "probable error in "
3330 "dma_cookie address from caller\n");
3331 }
3332 break;
3333 }
3334 }
3335
3336 /*
3337 * When we've reached the end of the current DMA cookie,
3338 * jump to the next cookie (if there are more)
3339 */
3340 if (cookie_cnt != 0) {
3341 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3342 }
3343 }
3344
3345 /* done all the cookies, now sync the memory for the device */
3346 if (sync_needed)
3347 (void) ddi_dma_sync(dma_info->dma_hdl,
3348 start * sizeof (hermon_hw_mtt_t),
3349 (i - start) * sizeof (hermon_hw_mtt_t),
3350 DDI_DMA_SYNC_FORDEV);
3351
3352 return (DDI_SUCCESS);
3353 }
3354
3355 /*
3356 * hermon_mr_fast_mtt_write_fmr()
3357 * Context: Can be called from interrupt or base context.
3358 */
3359 /* ARGSUSED */
3360 static int
hermon_mr_fast_mtt_write_fmr(hermon_state_t * state,hermon_rsrc_t * mtt,ibt_pmr_attr_t * mem_pattr,uint32_t mtt_pgsize_bits)3361 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3362 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3363 {
3364 hermon_icm_table_t *icm_table;
3365 hermon_dma_info_t *dma_info;
3366 uint32_t index1, index2, rindx;
3367 uint64_t *mtt_table;
3368 offset_t i, j;
3369 uint_t per_span;
3370
3371 icm_table = &state->hs_icm[HERMON_MTT];
3372 rindx = mtt->hr_indx;
3373 hermon_index(index1, index2, rindx, icm_table, i);
3374 per_span = icm_table->span;
3375 dma_info = icm_table->icm_dma[index1] + index2;
3376 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3377
3378 /*
3379 * Fill in the MTT table entries
3380 */
3381 for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3382 mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3383 i++;
3384 rindx++;
3385 if (i == per_span) {
3386 hermon_index(index1, index2, rindx, icm_table, i);
3387 dma_info = icm_table->icm_dma[index1] + index2;
3388 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3389 }
3390 }
3391
3392 return (DDI_SUCCESS);
3393 }
3394
3395
3396 /*
3397 * hermon_mtt_refcnt_inc()
3398 * Context: Can be called from interrupt or base context.
3399 */
3400 static uint_t
hermon_mtt_refcnt_inc(hermon_rsrc_t * rsrc)3401 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3402 {
3403 hermon_sw_refcnt_t *rc;
3404
3405 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3406 return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3407 }
3408
3409
3410 /*
3411 * hermon_mtt_refcnt_dec()
3412 * Context: Can be called from interrupt or base context.
3413 */
3414 static uint_t
hermon_mtt_refcnt_dec(hermon_rsrc_t * rsrc)3415 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3416 {
3417 hermon_sw_refcnt_t *rc;
3418
3419 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3420 return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3421 }
3422