1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_misc.c
28 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection
29 * Domain, and port-related operations
30 *
31 * Implements all the routines necessary for allocating, freeing, querying
32 * and modifying Address Handles and Protection Domains. Also implements
33 * all the routines necessary for adding and removing Queue Pairs to/from
34 * Multicast Groups. Lastly, it implements the routines necessary for
35 * port-related query and modify operations.
36 */
37
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/bitmap.h>
44 #include <sys/sysmacros.h>
45
46 #include <sys/ib/adapters/hermon/hermon.h>
47
48 extern int hermon_rdma_debug;
49 int hermon_fmr_verbose = 0;
50
51 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
52 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
54 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
57 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
60 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
61 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
62 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
63 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
64 hermon_hw_mcg_t *mcg_entry, uint_t indx);
65 static int hermon_mgid_is_valid(ib_gid_t gid);
66 static int hermon_mlid_is_valid(ib_lid_t lid);
67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool);
68
69
70 #define HERMON_MAX_DBR_PAGES_PER_USER 64
71 #define HERMON_DBR_KEY(index, page) \
72 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
73
74 static hermon_udbr_page_t *
hermon_dbr_new_user_page(hermon_state_t * state,uint_t index,uint_t page)75 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
76 uint_t page)
77 {
78 hermon_udbr_page_t *pagep;
79 ddi_dma_attr_t dma_attr;
80 uint_t cookiecnt;
81 int status;
82 hermon_umap_db_entry_t *umapdb;
83 ulong_t pagesize = PAGESIZE;
84
85 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
86 pagep->upg_index = page;
87 pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t);
88
89 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
90 pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8,
91 KM_SLEEP);
92 pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP,
93 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
94
95 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
96 pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
97
98 hermon_dma_attr_init(state, &dma_attr);
99 #ifdef __sparc
100 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
101 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
102 #endif
103 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
104 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
105 if (status != DDI_SUCCESS) {
106 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
107 "ddi_dma_buf_bind_handle failed: %d", status);
108 return (NULL);
109 }
110 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
111 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
112 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
113 if (status != DDI_SUCCESS) {
114 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
115 "ddi_dma_buf_bind_handle failed: %d", status);
116 ddi_dma_free_handle(&pagep->upg_dmahdl);
117 return (NULL);
118 }
119 ASSERT(cookiecnt == 1);
120
121 /* create db entry for mmap */
122 umapdb = hermon_umap_db_alloc(state->hs_instance,
123 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
124 (uint64_t)(uintptr_t)pagep);
125 hermon_umap_db_add(umapdb);
126 return (pagep);
127 }
128
129
130 /*ARGSUSED*/
131 static int
hermon_user_dbr_alloc(hermon_state_t * state,uint_t index,ddi_acc_handle_t * acchdl,hermon_dbr_t ** vdbr,uint64_t * pdbr,uint64_t * mapoffset)132 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
133 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
134 uint64_t *mapoffset)
135 {
136 hermon_user_dbr_t *udbr;
137 hermon_udbr_page_t *pagep;
138 uint_t next_page;
139 int dbr_index;
140 int i1, i2, i3, last;
141 uint64_t u64, mask;
142
143 mutex_enter(&state->hs_dbr_lock);
144 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
145 if (udbr->udbr_index == index)
146 break;
147 if (udbr == NULL) {
148 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
149 udbr->udbr_link = state->hs_user_dbr;
150 state->hs_user_dbr = udbr;
151 udbr->udbr_index = index;
152 udbr->udbr_pagep = NULL;
153 }
154 pagep = udbr->udbr_pagep;
155 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
156 while (pagep != NULL)
157 if (pagep->upg_nfree > 0)
158 break;
159 else
160 pagep = pagep->upg_link;
161 if (pagep == NULL) {
162 pagep = hermon_dbr_new_user_page(state, index, next_page);
163 if (pagep == NULL) {
164 mutex_exit(&state->hs_dbr_lock);
165 return (DDI_FAILURE);
166 }
167 pagep->upg_link = udbr->udbr_pagep;
168 udbr->udbr_pagep = pagep;
169 }
170
171 /* Since nfree > 0, we're assured the loops below will succeed */
172
173 /* First, find a 64-bit (not ~0) that has a free dbr */
174 last = PAGESIZE / sizeof (uint64_t) / 64;
175 mask = ~0ull;
176 for (i1 = 0; i1 < last; i1++)
177 if ((pagep->upg_free[i1] & mask) != mask)
178 break;
179 u64 = pagep->upg_free[i1];
180
181 /* Second, find a byte (not 0xff) that has a free dbr */
182 last = sizeof (uint64_t) / sizeof (uint8_t);
183 for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8)
184 if ((u64 & mask) != mask)
185 break;
186
187 /* Third, find a bit that is free (0) */
188 for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++)
189 if ((u64 & (1ul << (i3 + 8 * i2))) == 0)
190 break;
191
192 /* Mark it as allocated */
193 pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2));
194
195 dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3;
196 pagep->upg_nfree--;
197 ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0; /* clear dbr */
198 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
199 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
200 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr +
201 dbr_index);
202 *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index *
203 sizeof (uint64_t);
204
205 mutex_exit(&state->hs_dbr_lock);
206 return (DDI_SUCCESS);
207 }
208
209 static void
hermon_user_dbr_free(hermon_state_t * state,uint_t index,hermon_dbr_t * record)210 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
211 {
212 hermon_user_dbr_t *udbr;
213 hermon_udbr_page_t *pagep;
214 caddr_t kvaddr;
215 uint_t dbr_index;
216 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t);
217 int i1, i2;
218
219 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
220 kvaddr = (caddr_t)record - dbr_index;
221 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
222
223 mutex_enter(&state->hs_dbr_lock);
224 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
225 if (udbr->udbr_index == index)
226 break;
227 if (udbr == NULL) {
228 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
229 "found for index %x", index);
230 mutex_exit(&state->hs_dbr_lock);
231 return;
232 }
233 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
234 if (pagep->upg_kvaddr == kvaddr)
235 break;
236 if (pagep == NULL) {
237 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
238 " found for index %x, kvaddr %p, DBR index %x",
239 index, kvaddr, dbr_index);
240 mutex_exit(&state->hs_dbr_lock);
241 return;
242 }
243 if (pagep->upg_nfree >= max_free) {
244 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
245 "UCE index %x, DBR index %x", index, dbr_index);
246 mutex_exit(&state->hs_dbr_lock);
247 return;
248 }
249 ASSERT(dbr_index < max_free);
250 i1 = dbr_index / 64;
251 i2 = dbr_index % 64;
252 ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2));
253 pagep->upg_free[i1] &= ~(1ul << i2);
254 pagep->upg_nfree++;
255 mutex_exit(&state->hs_dbr_lock);
256 }
257
258 /*
259 * hermon_dbr_page_alloc()
260 * first page allocation - called from attach or open
261 * in this case, we want exactly one page per call, and aligned on a
262 * page - and may need to be mapped to the user for access
263 */
264 int
hermon_dbr_page_alloc(hermon_state_t * state,hermon_dbr_info_t ** dinfo)265 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
266 {
267 int status;
268 ddi_dma_handle_t dma_hdl;
269 ddi_acc_handle_t acc_hdl;
270 ddi_dma_attr_t dma_attr;
271 ddi_dma_cookie_t cookie;
272 uint_t cookie_cnt;
273 int i;
274 hermon_dbr_info_t *info;
275 caddr_t dmaaddr;
276 uint64_t dmalen;
277 ulong_t pagesize = PAGESIZE;
278
279 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
280
281 /*
282 * Initialize many of the default DMA attributes. Then set additional
283 * alignment restrictions if necessary for the dbr memory, meaning
284 * page aligned. Also use the configured value for IOMMU bypass
285 */
286 hermon_dma_attr_init(state, &dma_attr);
287 dma_attr.dma_attr_align = pagesize;
288 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */
289 #ifdef __sparc
290 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
291 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
292 #endif
293
294 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
295 DDI_DMA_SLEEP, NULL, &dma_hdl);
296 if (status != DDI_SUCCESS) {
297 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
298 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
299 return (DDI_FAILURE);
300 }
301
302 status = ddi_dma_mem_alloc(dma_hdl, pagesize,
303 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
304 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
305 if (status != DDI_SUCCESS) {
306 ddi_dma_free_handle(&dma_hdl);
307 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
308 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
309 return (DDI_FAILURE);
310 }
311
312 /* this memory won't be IB registered, so do the bind here */
313 status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
314 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
315 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
316 if (status != DDI_SUCCESS) {
317 ddi_dma_mem_free(&acc_hdl);
318 ddi_dma_free_handle(&dma_hdl);
319 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
320 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
321 status);
322 return (DDI_FAILURE);
323 }
324 *dinfo = info; /* Pass back the pointer */
325
326 /* init the info structure with returned info */
327 info->dbr_dmahdl = dma_hdl;
328 info->dbr_acchdl = acc_hdl;
329 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr;
330 info->dbr_link = NULL;
331 /* extract the phys addr from the cookie */
332 info->dbr_paddr = cookie.dmac_laddress;
333 info->dbr_firstfree = 0;
334 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
335 /* link all DBrs onto the free list */
336 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
337 info->dbr_page[i] = i + 1;
338 }
339
340 return (DDI_SUCCESS);
341 }
342
343
344 /*
345 * hermon_dbr_alloc()
346 * DBr record allocation - called from alloc cq/qp/srq
347 * will check for available dbrs in current
348 * page - if needed it will allocate another and link them
349 */
350
351 int
hermon_dbr_alloc(hermon_state_t * state,uint_t index,ddi_acc_handle_t * acchdl,hermon_dbr_t ** vdbr,uint64_t * pdbr,uint64_t * mapoffset)352 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
353 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
354 {
355 hermon_dbr_t *record = NULL;
356 hermon_dbr_info_t *info = NULL;
357 uint32_t idx;
358 int status;
359
360 if (index != state->hs_kernel_uar_index)
361 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
362 mapoffset));
363
364 mutex_enter(&state->hs_dbr_lock);
365 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
366 if (info->dbr_nfree != 0)
367 break; /* found a page w/ one available */
368
369 if (info == NULL) { /* did NOT find a page with one available */
370 status = hermon_dbr_page_alloc(state, &info);
371 if (status != DDI_SUCCESS) {
372 /* do error handling */
373 mutex_exit(&state->hs_dbr_lock);
374 return (DDI_FAILURE);
375 }
376 /* got a new page, so link it in. */
377 info->dbr_link = state->hs_kern_dbr;
378 state->hs_kern_dbr = info;
379 }
380 idx = info->dbr_firstfree;
381 record = info->dbr_page + idx;
382 info->dbr_firstfree = *record;
383 info->dbr_nfree--;
384 *record = 0;
385
386 *acchdl = info->dbr_acchdl;
387 *vdbr = record;
388 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
389 mutex_exit(&state->hs_dbr_lock);
390 return (DDI_SUCCESS);
391 }
392
393 /*
394 * hermon_dbr_free()
395 * DBr record deallocation - called from free cq/qp
396 * will update the counter in the header, and invalidate
397 * the dbr, but will NEVER free pages of dbrs - small
398 * price to pay, but userland access never will anyway
399 */
400 void
hermon_dbr_free(hermon_state_t * state,uint_t indx,hermon_dbr_t * record)401 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
402 {
403 hermon_dbr_t *page;
404 hermon_dbr_info_t *info;
405
406 if (indx != state->hs_kernel_uar_index) {
407 hermon_user_dbr_free(state, indx, record);
408 return;
409 }
410 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
411 mutex_enter(&state->hs_dbr_lock);
412 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
413 if (info->dbr_page == page)
414 break;
415 ASSERT(info != NULL);
416 *record = info->dbr_firstfree;
417 info->dbr_firstfree = record - info->dbr_page;
418 info->dbr_nfree++;
419 mutex_exit(&state->hs_dbr_lock);
420 }
421
422 /*
423 * hermon_dbr_kern_free()
424 * Context: Can be called only from detach context.
425 *
426 * Free all kernel dbr pages. This includes the freeing of all the dma
427 * resources acquired during the allocation of the pages.
428 *
429 * Also, free all the user dbr pages.
430 */
431 void
hermon_dbr_kern_free(hermon_state_t * state)432 hermon_dbr_kern_free(hermon_state_t *state)
433 {
434 hermon_dbr_info_t *info, *link;
435 hermon_user_dbr_t *udbr, *next;
436 hermon_udbr_page_t *pagep, *nextp;
437 hermon_umap_db_entry_t *umapdb;
438 int instance, status;
439 uint64_t value;
440 extern hermon_umap_db_t hermon_userland_rsrc_db;
441
442 mutex_enter(&state->hs_dbr_lock);
443 for (info = state->hs_kern_dbr; info != NULL; info = link) {
444 (void) ddi_dma_unbind_handle(info->dbr_dmahdl);
445 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */
446 ddi_dma_free_handle(&info->dbr_dmahdl);
447 link = info->dbr_link;
448 kmem_free(info, sizeof (hermon_dbr_info_t));
449 }
450
451 udbr = state->hs_user_dbr;
452 instance = state->hs_instance;
453 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
454 while (udbr != NULL) {
455 pagep = udbr->udbr_pagep;
456 while (pagep != NULL) {
457 /* probably need to remove "db" */
458 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
459 ddi_dma_free_handle(&pagep->upg_dmahdl);
460 freerbuf(pagep->upg_buf);
461 ddi_umem_free(pagep->upg_umemcookie);
462 status = hermon_umap_db_find_nolock(instance,
463 HERMON_DBR_KEY(udbr->udbr_index,
464 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
465 &value, HERMON_UMAP_DB_REMOVE, &umapdb);
466 if (status == DDI_SUCCESS)
467 hermon_umap_db_free(umapdb);
468 kmem_free(pagep->upg_free,
469 PAGESIZE / sizeof (hermon_dbr_t) / 8);
470 nextp = pagep->upg_link;
471 kmem_free(pagep, sizeof (*pagep));
472 pagep = nextp;
473 }
474 next = udbr->udbr_link;
475 kmem_free(udbr, sizeof (*udbr));
476 udbr = next;
477 }
478 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
479 mutex_exit(&state->hs_dbr_lock);
480 }
481
482 /*
483 * hermon_ah_alloc()
484 * Context: Can be called only from user or kernel context.
485 */
486 int
hermon_ah_alloc(hermon_state_t * state,hermon_pdhdl_t pd,ibt_adds_vect_t * attr_p,hermon_ahhdl_t * ahhdl,uint_t sleepflag)487 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
488 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
489 {
490 hermon_rsrc_t *rsrc;
491 hermon_hw_udav_t *udav;
492 hermon_ahhdl_t ah;
493 int status;
494
495 /*
496 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
497 * indicate that we wish to allocate an "invalid" (i.e. empty)
498 * address handle XXX
499 */
500
501 /* Validate that specified port number is legal */
502 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
503 return (IBT_HCA_PORT_INVALID);
504 }
505
506 /*
507 * Allocate the software structure for tracking the address handle
508 * (i.e. the Hermon Address Handle struct).
509 */
510 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
511 if (status != DDI_SUCCESS) {
512 return (IBT_INSUFF_RESOURCE);
513 }
514 ah = (hermon_ahhdl_t)rsrc->hr_addr;
515 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
516
517 /* Increment the reference count on the protection domain (PD) */
518 hermon_pd_refcnt_inc(pd);
519
520 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
521 KM_SLEEP);
522 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
523
524 /*
525 * Fill in the UDAV data. We first zero out the UDAV, then populate
526 * it by then calling hermon_set_addr_path() to fill in the common
527 * portions that can be pulled from the "ibt_adds_vect_t" passed in
528 */
529 status = hermon_set_addr_path(state, attr_p,
530 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
531 if (status != DDI_SUCCESS) {
532 hermon_pd_refcnt_dec(pd);
533 hermon_rsrc_free(state, &rsrc);
534 return (status);
535 }
536 udav->pd = pd->pd_pdnum;
537 udav->sl = attr_p->av_srvl;
538
539 /*
540 * Fill in the rest of the Hermon Address Handle struct.
541 *
542 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
543 * here because we may need to return it later to the IBTF (as a
544 * result of a subsequent query operation). Unlike the other UDAV
545 * parameters, the value of "av_dgid.gid_guid" is not always preserved.
546 * The reason for this is described in hermon_set_addr_path().
547 */
548 ah->ah_rsrcp = rsrc;
549 ah->ah_pdhdl = pd;
550 ah->ah_udav = udav;
551 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
552 *ahhdl = ah;
553
554 return (DDI_SUCCESS);
555 }
556
557
558 /*
559 * hermon_ah_free()
560 * Context: Can be called only from user or kernel context.
561 */
562 /* ARGSUSED */
563 int
hermon_ah_free(hermon_state_t * state,hermon_ahhdl_t * ahhdl,uint_t sleepflag)564 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
565 {
566 hermon_rsrc_t *rsrc;
567 hermon_pdhdl_t pd;
568 hermon_ahhdl_t ah;
569
570 /*
571 * Pull all the necessary information from the Hermon Address Handle
572 * struct. This is necessary here because the resource for the
573 * AH is going to be freed up as part of this operation.
574 */
575 ah = *ahhdl;
576 mutex_enter(&ah->ah_lock);
577 rsrc = ah->ah_rsrcp;
578 pd = ah->ah_pdhdl;
579 mutex_exit(&ah->ah_lock);
580 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
581
582 /* Free the UDAV memory */
583 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
584
585 /* Decrement the reference count on the protection domain (PD) */
586 hermon_pd_refcnt_dec(pd);
587
588 /* Free the Hermon Address Handle structure */
589 hermon_rsrc_free(state, &rsrc);
590
591 /* Set the ahhdl pointer to NULL and return success */
592 *ahhdl = NULL;
593
594 return (DDI_SUCCESS);
595 }
596
597
598 /*
599 * hermon_ah_query()
600 * Context: Can be called from interrupt or base context.
601 */
602 /* ARGSUSED */
603 int
hermon_ah_query(hermon_state_t * state,hermon_ahhdl_t ah,hermon_pdhdl_t * pd,ibt_adds_vect_t * attr_p)604 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
605 ibt_adds_vect_t *attr_p)
606 {
607 mutex_enter(&ah->ah_lock);
608 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
609
610 /*
611 * Pull the PD and UDAV from the Hermon Address Handle structure
612 */
613 *pd = ah->ah_pdhdl;
614
615 /*
616 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill
617 * the common portions that can be pulled from the UDAV we pass in.
618 *
619 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
620 * "ah_save_guid" field we have previously saved away. The reason
621 * for this is described in hermon_ah_alloc() and hermon_ah_modify().
622 */
623 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
624 attr_p, HERMON_ADDRPATH_UDAV);
625
626 attr_p->av_dgid.gid_guid = ah->ah_save_guid;
627
628 mutex_exit(&ah->ah_lock);
629 return (DDI_SUCCESS);
630 }
631
632
633 /*
634 * hermon_ah_modify()
635 * Context: Can be called from interrupt or base context.
636 */
637 /* ARGSUSED */
638 int
hermon_ah_modify(hermon_state_t * state,hermon_ahhdl_t ah,ibt_adds_vect_t * attr_p)639 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
640 ibt_adds_vect_t *attr_p)
641 {
642 hermon_hw_udav_t old_udav;
643 uint64_t data_old;
644 int status, size, i;
645
646 /* Validate that specified port number is legal */
647 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
648 return (IBT_HCA_PORT_INVALID);
649 }
650
651 mutex_enter(&ah->ah_lock);
652
653 /* Save a copy of the current UDAV data in old_udav. */
654 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
655
656 /*
657 * Fill in the new UDAV with the caller's data, passed in via the
658 * "ibt_adds_vect_t" structure.
659 *
660 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
661 * field here (just as we did during hermon_ah_alloc()) because we
662 * may need to return it later to the IBTF (as a result of a
663 * subsequent query operation). As explained in hermon_ah_alloc(),
664 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
665 * is not always preserved. The reason for this is described in
666 * hermon_set_addr_path().
667 */
668 status = hermon_set_addr_path(state, attr_p,
669 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
670 if (status != DDI_SUCCESS) {
671 mutex_exit(&ah->ah_lock);
672 return (status);
673 }
674 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
675 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav)))
676 ah->ah_udav->sl = attr_p->av_srvl;
677
678 /*
679 * Copy changes into the new UDAV.
680 * Note: We copy in 64-bit chunks. For the first two of these
681 * chunks it is necessary to read the current contents of the
682 * UDAV, mask off the modifiable portions (maintaining any
683 * of the "reserved" portions), and then mask on the new data.
684 */
685 size = sizeof (hermon_hw_udav_t) >> 3;
686 for (i = 0; i < size; i++) {
687 data_old = ((uint64_t *)&old_udav)[i];
688
689 /*
690 * Apply mask to change only the relevant values.
691 */
692 if (i == 0) {
693 data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
694 } else if (i == 1) {
695 data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
696 } else {
697 data_old = 0;
698 }
699
700 /* Store the updated values to the UDAV */
701 ((uint64_t *)ah->ah_udav)[i] |= data_old;
702 }
703
704 /*
705 * Put the valid PD number back into the UDAV entry, as it
706 * might have been clobbered above.
707 */
708 ah->ah_udav->pd = old_udav.pd;
709
710
711 mutex_exit(&ah->ah_lock);
712 return (DDI_SUCCESS);
713 }
714
715 /*
716 * hermon_mcg_attach()
717 * Context: Can be called only from user or kernel context.
718 */
719 int
hermon_mcg_attach(hermon_state_t * state,hermon_qphdl_t qp,ib_gid_t gid,ib_lid_t lid)720 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
721 ib_lid_t lid)
722 {
723 hermon_rsrc_t *rsrc;
724 hermon_hw_mcg_t *mcg_entry;
725 hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
726 hermon_mcghdl_t mcg, newmcg;
727 uint64_t mgid_hash;
728 uint32_t end_indx;
729 int status;
730 uint_t qp_found;
731
732 /*
733 * It is only allowed to attach MCG to UD queue pairs. Verify
734 * that the intended QP is of the appropriate transport type
735 */
736 if (qp->qp_serv_type != HERMON_QP_UD) {
737 return (IBT_QP_SRV_TYPE_INVALID);
738 }
739
740 /*
741 * Check for invalid Multicast DLID. Specifically, all Multicast
742 * LIDs should be within a well defined range. If the specified LID
743 * is outside of that range, then return an error.
744 */
745 if (hermon_mlid_is_valid(lid) == 0) {
746 return (IBT_MC_MLID_INVALID);
747 }
748 /*
749 * Check for invalid Multicast GID. All Multicast GIDs should have
750 * a well-defined pattern of bits and flags that are allowable. If
751 * the specified GID does not meet the criteria, then return an error.
752 */
753 if (hermon_mgid_is_valid(gid) == 0) {
754 return (IBT_MC_MGID_INVALID);
755 }
756
757 /*
758 * Compute the MGID hash value. Since the MCG table is arranged as
759 * a number of separate hash chains, this operation converts the
760 * specified MGID into the starting index of an entry in the hash
761 * table (i.e. the index for the start of the appropriate hash chain).
762 * Subsequent operations below will walk the chain searching for the
763 * right place to add this new QP.
764 */
765 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
766 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
767 if (status != HERMON_CMD_SUCCESS) {
768 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
769 status);
770 if (status == HERMON_CMD_INVALID_STATUS) {
771 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
772 }
773 return (ibc_get_ci_failure(0));
774 }
775
776 /*
777 * Grab the multicast group mutex. Then grab the pre-allocated
778 * temporary buffer used for holding and/or modifying MCG entries.
779 * Zero out the temporary MCG entry before we begin.
780 */
781 mutex_enter(&state->hs_mcglock);
782 mcg_entry = state->hs_mcgtmp;
783 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
784 bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
785
786 /*
787 * Walk through the array of MCG entries starting at "mgid_hash".
788 * Try to find the appropriate place for this new QP to be added.
789 * This could happen when the first entry of the chain has MGID == 0
790 * (which means that the hash chain is empty), or because we find
791 * an entry with the same MGID (in which case we'll add the QP to
792 * that MCG), or because we come to the end of the chain (in which
793 * case this is the first QP being added to the multicast group that
794 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine
795 * walks the list and returns an index into the MCG table. The entry
796 * at this index is then checked to determine which case we have
797 * fallen into (see below). Note: We are using the "shadow" MCG
798 * list (of hermon_mcg_t structs) for this lookup because the real
799 * MCG entries are in hardware (and the lookup process would be much
800 * more time consuming).
801 */
802 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
803 mcg = &state->hs_mcghdl[end_indx];
804
805 /*
806 * If MGID == 0, then the hash chain is empty. Just fill in the
807 * current entry. Note: No need to allocate an MCG table entry
808 * as all the hash chain "heads" are already preallocated.
809 */
810 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
811
812 /* Fill in the current entry in the "shadow" MCG list */
813 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
814
815 /*
816 * Try to add the new QP number to the list. This (and the
817 * above) routine fills in a temporary MCG. The "mcg_entry"
818 * and "mcg_entry_qplist" pointers simply point to different
819 * offsets within the same temporary copy of the MCG (for
820 * convenience). Note: If this fails, we need to invalidate
821 * the entries we've already put into the "shadow" list entry
822 * above.
823 */
824 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
825 &qp_found);
826 if (status != DDI_SUCCESS) {
827 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
828 mutex_exit(&state->hs_mcglock);
829 return (status);
830 }
831 if (!qp_found)
832 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
833 /* set the member count */
834
835 /*
836 * Once the temporary MCG has been filled in, write the entry
837 * into the appropriate location in the Hermon MCG entry table.
838 * If it's successful, then drop the lock and return success.
839 * Note: In general, this operation shouldn't fail. If it
840 * does, then it is an indication that something (probably in
841 * HW, but maybe in SW) has gone seriously wrong. We still
842 * want to zero out the entries that we've filled in above
843 * (in the hermon_mcg_setup_new_hdr() routine).
844 */
845 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
846 HERMON_CMD_NOSLEEP_SPIN);
847 if (status != HERMON_CMD_SUCCESS) {
848 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
849 mutex_exit(&state->hs_mcglock);
850 HERMON_WARNING(state, "failed to write MCG entry");
851 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
852 "%08x\n", status);
853 if (status == HERMON_CMD_INVALID_STATUS) {
854 hermon_fm_ereport(state, HCA_SYS_ERR,
855 HCA_ERR_SRV_LOST);
856 }
857 return (ibc_get_ci_failure(0));
858 }
859
860 /*
861 * Now that we know all the Hermon firmware accesses have been
862 * successful, we update the "shadow" MCG entry by incrementing
863 * the "number of attached QPs" count.
864 *
865 * We increment only if the QP is not already part of the
866 * MCG by checking the 'qp_found' flag returned from the
867 * qplist_add above.
868 */
869 if (!qp_found) {
870 mcg->mcg_num_qps++;
871
872 /*
873 * Increment the refcnt for this QP. Because the QP
874 * was added to this MCG, the refcnt must be
875 * incremented.
876 */
877 hermon_qp_mcg_refcnt_inc(qp);
878 }
879
880 /*
881 * We drop the lock and return success.
882 */
883 mutex_exit(&state->hs_mcglock);
884 return (DDI_SUCCESS);
885 }
886
887 /*
888 * If the specified MGID matches the MGID in the current entry, then
889 * we need to try to add the QP to the current MCG entry. In this
890 * case, it means that we need to read the existing MCG entry (into
891 * the temporary MCG), add the new QP number to the temporary entry
892 * (using the same method we used above), and write the entry back
893 * to the hardware (same as above).
894 */
895 if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
896 (mcg->mcg_mgid_l == gid.gid_guid)) {
897
898 /*
899 * Read the current MCG entry into the temporary MCG. Note:
900 * In general, this operation shouldn't fail. If it does,
901 * then it is an indication that something (probably in HW,
902 * but maybe in SW) has gone seriously wrong.
903 */
904 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
905 HERMON_CMD_NOSLEEP_SPIN);
906 if (status != HERMON_CMD_SUCCESS) {
907 mutex_exit(&state->hs_mcglock);
908 HERMON_WARNING(state, "failed to read MCG entry");
909 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
910 "%08x\n", status);
911 if (status == HERMON_CMD_INVALID_STATUS) {
912 hermon_fm_ereport(state, HCA_SYS_ERR,
913 HCA_ERR_SRV_LOST);
914 }
915 return (ibc_get_ci_failure(0));
916 }
917
918 /*
919 * Try to add the new QP number to the list. This routine
920 * fills in the necessary pieces of the temporary MCG. The
921 * "mcg_entry_qplist" pointer is used to point to the portion
922 * of the temporary MCG that holds the QP numbers.
923 *
924 * Note: hermon_mcg_qplist_add() returns SUCCESS if it
925 * already found the QP in the list. In this case, the QP is
926 * not added on to the list again. Check the flag 'qp_found'
927 * if this value is needed to be known.
928 *
929 */
930 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
931 &qp_found);
932 if (status != DDI_SUCCESS) {
933 mutex_exit(&state->hs_mcglock);
934 return (status);
935 }
936 if (!qp_found)
937 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
938 /* set the member count */
939
940 /*
941 * Once the temporary MCG has been updated, write the entry
942 * into the appropriate location in the Hermon MCG entry table.
943 * If it's successful, then drop the lock and return success.
944 * Note: In general, this operation shouldn't fail. If it
945 * does, then it is an indication that something (probably in
946 * HW, but maybe in SW) has gone seriously wrong.
947 */
948 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
949 HERMON_CMD_NOSLEEP_SPIN);
950 if (status != HERMON_CMD_SUCCESS) {
951 mutex_exit(&state->hs_mcglock);
952 HERMON_WARNING(state, "failed to write MCG entry");
953 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
954 "%08x\n", status);
955 if (status == HERMON_CMD_INVALID_STATUS) {
956 hermon_fm_ereport(state, HCA_SYS_ERR,
957 HCA_ERR_SRV_LOST);
958 }
959 return (ibc_get_ci_failure(0));
960 }
961
962 /*
963 * Now that we know all the Hermon firmware accesses have been
964 * successful, we update the current "shadow" MCG entry by
965 * incrementing the "number of attached QPs" count.
966 *
967 * We increment only if the QP is not already part of the
968 * MCG by checking the 'qp_found' flag returned
969 * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
970 */
971 if (!qp_found) {
972 mcg->mcg_num_qps++;
973
974 /*
975 * Increment the refcnt for this QP. Because the QP
976 * was added to this MCG, the refcnt must be
977 * incremented.
978 */
979 hermon_qp_mcg_refcnt_inc(qp);
980 }
981
982 /*
983 * We drop the lock and return success.
984 */
985 mutex_exit(&state->hs_mcglock);
986 return (DDI_SUCCESS);
987 }
988
989 /*
990 * If we've reached here, then we're at the end of the hash chain.
991 * We need to allocate a new MCG entry, fill it in, write it to Hermon,
992 * and update the previous entry to link the new one to the end of the
993 * chain.
994 */
995
996 /*
997 * Allocate an MCG table entry. This will be filled in with all
998 * the necessary parameters to define the multicast group. Then it
999 * will be written to the hardware in the next-to-last step below.
1000 */
1001 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
1002 if (status != DDI_SUCCESS) {
1003 mutex_exit(&state->hs_mcglock);
1004 return (IBT_INSUFF_RESOURCE);
1005 }
1006
1007 /*
1008 * Fill in the new entry in the "shadow" MCG list. Note: Just as
1009 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1010 * of the temporary MCG entry (the rest of which will be filled in by
1011 * hermon_mcg_qplist_add() below)
1012 */
1013 newmcg = &state->hs_mcghdl[rsrc->hr_indx];
1014 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
1015
1016 /*
1017 * Try to add the new QP number to the list. This routine fills in
1018 * the final necessary pieces of the temporary MCG. The
1019 * "mcg_entry_qplist" pointer is used to point to the portion of the
1020 * temporary MCG that holds the QP numbers. If we fail here, we
1021 * must undo the previous resource allocation.
1022 *
1023 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1024 * found the QP in the list. In this case, the QP is not added on to
1025 * the list again. Check the flag 'qp_found' if this value is needed
1026 * to be known.
1027 */
1028 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
1029 &qp_found);
1030 if (status != DDI_SUCCESS) {
1031 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1032 hermon_rsrc_free(state, &rsrc);
1033 mutex_exit(&state->hs_mcglock);
1034 return (status);
1035 }
1036 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1037 /* set the member count */
1038
1039 /*
1040 * Once the temporary MCG has been updated, write the entry into the
1041 * appropriate location in the Hermon MCG entry table. If this is
1042 * successful, then we need to chain the previous entry to this one.
1043 * Note: In general, this operation shouldn't fail. If it does, then
1044 * it is an indication that something (probably in HW, but maybe in
1045 * SW) has gone seriously wrong.
1046 */
1047 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1048 HERMON_CMD_NOSLEEP_SPIN);
1049 if (status != HERMON_CMD_SUCCESS) {
1050 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1051 hermon_rsrc_free(state, &rsrc);
1052 mutex_exit(&state->hs_mcglock);
1053 HERMON_WARNING(state, "failed to write MCG entry");
1054 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1055 status);
1056 if (status == HERMON_CMD_INVALID_STATUS) {
1057 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1058 }
1059 return (ibc_get_ci_failure(0));
1060 }
1061
1062 /*
1063 * Now read the current MCG entry (the one previously at the end of
1064 * hash chain) into the temporary MCG. We are going to update its
1065 * "next_gid_indx" now and write the entry back to the MCG table.
1066 * Note: In general, this operation shouldn't fail. If it does, then
1067 * it is an indication that something (probably in HW, but maybe in SW)
1068 * has gone seriously wrong. We will free up the MCG entry resource,
1069 * but we will not undo the previously written MCG entry in the HW.
1070 * This is OK, though, because the MCG entry is not currently attached
1071 * to any hash chain.
1072 */
1073 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1074 HERMON_CMD_NOSLEEP_SPIN);
1075 if (status != HERMON_CMD_SUCCESS) {
1076 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1077 hermon_rsrc_free(state, &rsrc);
1078 mutex_exit(&state->hs_mcglock);
1079 HERMON_WARNING(state, "failed to read MCG entry");
1080 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1081 status);
1082 if (status == HERMON_CMD_INVALID_STATUS) {
1083 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1084 }
1085 return (ibc_get_ci_failure(0));
1086 }
1087
1088 /*
1089 * Finally, we update the "next_gid_indx" field in the temporary MCG
1090 * and attempt to write the entry back into the Hermon MCG table. If
1091 * this succeeds, then we update the "shadow" list to reflect the
1092 * change, drop the lock, and return success. Note: In general, this
1093 * operation shouldn't fail. If it does, then it is an indication
1094 * that something (probably in HW, but maybe in SW) has gone seriously
1095 * wrong. Just as we do above, we will free up the MCG entry resource,
1096 * but we will not try to undo the previously written MCG entry. This
1097 * is OK, though, because (since we failed here to update the end of
1098 * the chain) that other entry is not currently attached to any chain.
1099 */
1100 mcg_entry->next_gid_indx = rsrc->hr_indx;
1101 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1102 HERMON_CMD_NOSLEEP_SPIN);
1103 if (status != HERMON_CMD_SUCCESS) {
1104 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1105 hermon_rsrc_free(state, &rsrc);
1106 mutex_exit(&state->hs_mcglock);
1107 HERMON_WARNING(state, "failed to write MCG entry");
1108 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1109 status);
1110 if (status == HERMON_CMD_INVALID_STATUS) {
1111 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1112 }
1113 return (ibc_get_ci_failure(0));
1114 }
1115 mcg = &state->hs_mcghdl[end_indx];
1116 mcg->mcg_next_indx = rsrc->hr_indx;
1117
1118 /*
1119 * Now that we know all the Hermon firmware accesses have been
1120 * successful, we update the new "shadow" MCG entry by incrementing
1121 * the "number of attached QPs" count. Then we drop the lock and
1122 * return success.
1123 */
1124 newmcg->mcg_num_qps++;
1125
1126 /*
1127 * Increment the refcnt for this QP. Because the QP
1128 * was added to this MCG, the refcnt must be
1129 * incremented.
1130 */
1131 hermon_qp_mcg_refcnt_inc(qp);
1132
1133 mutex_exit(&state->hs_mcglock);
1134 return (DDI_SUCCESS);
1135 }
1136
1137
1138 /*
1139 * hermon_mcg_detach()
1140 * Context: Can be called only from user or kernel context.
1141 */
1142 int
hermon_mcg_detach(hermon_state_t * state,hermon_qphdl_t qp,ib_gid_t gid,ib_lid_t lid)1143 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1144 ib_lid_t lid)
1145 {
1146 hermon_hw_mcg_t *mcg_entry;
1147 hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
1148 hermon_mcghdl_t mcg;
1149 uint64_t mgid_hash;
1150 uint32_t end_indx, prev_indx;
1151 int status;
1152
1153 /*
1154 * Check for invalid Multicast DLID. Specifically, all Multicast
1155 * LIDs should be within a well defined range. If the specified LID
1156 * is outside of that range, then return an error.
1157 */
1158 if (hermon_mlid_is_valid(lid) == 0) {
1159 return (IBT_MC_MLID_INVALID);
1160 }
1161
1162 /*
1163 * Compute the MGID hash value. As described above, the MCG table is
1164 * arranged as a number of separate hash chains. This operation
1165 * converts the specified MGID into the starting index of an entry in
1166 * the hash table (i.e. the index for the start of the appropriate
1167 * hash chain). Subsequent operations below will walk the chain
1168 * searching for a matching entry from which to attempt to remove
1169 * the specified QP.
1170 */
1171 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1172 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1173 if (status != HERMON_CMD_SUCCESS) {
1174 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1175 status);
1176 if (status == HERMON_CMD_INVALID_STATUS) {
1177 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1178 }
1179 return (ibc_get_ci_failure(0));
1180 }
1181
1182 /*
1183 * Grab the multicast group mutex. Then grab the pre-allocated
1184 * temporary buffer used for holding and/or modifying MCG entries.
1185 */
1186 mutex_enter(&state->hs_mcglock);
1187 mcg_entry = state->hs_mcgtmp;
1188 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1189
1190 /*
1191 * Walk through the array of MCG entries starting at "mgid_hash".
1192 * Try to find an MCG entry with a matching MGID. The
1193 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1194 * index into the MCG table. The entry at this index is checked to
1195 * determine whether it is a match or not. If it is a match, then
1196 * we continue on to attempt to remove the QP from the MCG. If it
1197 * is not a match (or not a valid MCG entry), then we return an error.
1198 */
1199 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1200 mcg = &state->hs_mcghdl[end_indx];
1201
1202 /*
1203 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1204 * does not match the MGID in the current entry, then return
1205 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1206 * valid).
1207 */
1208 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1209 ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1210 (mcg->mcg_mgid_l != gid.gid_guid))) {
1211 mutex_exit(&state->hs_mcglock);
1212 return (IBT_MC_MGID_INVALID);
1213 }
1214
1215 /*
1216 * Read the current MCG entry into the temporary MCG. Note: In
1217 * general, this operation shouldn't fail. If it does, then it is
1218 * an indication that something (probably in HW, but maybe in SW)
1219 * has gone seriously wrong.
1220 */
1221 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1222 HERMON_CMD_NOSLEEP_SPIN);
1223 if (status != HERMON_CMD_SUCCESS) {
1224 mutex_exit(&state->hs_mcglock);
1225 HERMON_WARNING(state, "failed to read MCG entry");
1226 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1227 status);
1228 if (status == HERMON_CMD_INVALID_STATUS) {
1229 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1230 }
1231 return (ibc_get_ci_failure(0));
1232 }
1233
1234 /*
1235 * Search the QP number list for a match. If a match is found, then
1236 * remove the entry from the QP list. Otherwise, if no match is found,
1237 * return an error.
1238 */
1239 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1240 if (status != DDI_SUCCESS) {
1241 mutex_exit(&state->hs_mcglock);
1242 return (status);
1243 }
1244
1245 /*
1246 * Decrement the MCG count for this QP. When the 'qp_mcg'
1247 * field becomes 0, then this QP is no longer a member of any
1248 * MCG.
1249 */
1250 hermon_qp_mcg_refcnt_dec(qp);
1251
1252 /*
1253 * If the current MCG's QP number list is about to be made empty
1254 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1255 * chain. Otherwise, just write the updated MCG entry back to the
1256 * hardware. In either case, once we successfully update the hardware
1257 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1258 * count (or zero out the entire "shadow" list entry) before returning
1259 * success. Note: Zeroing out the "shadow" list entry is done
1260 * inside of hermon_mcg_hash_list_remove().
1261 */
1262 if (mcg->mcg_num_qps == 1) {
1263
1264 /* Remove an MCG entry from the hash chain */
1265 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1266 mcg_entry);
1267 if (status != DDI_SUCCESS) {
1268 mutex_exit(&state->hs_mcglock);
1269 return (status);
1270 }
1271
1272 } else {
1273 /*
1274 * Write the updated MCG entry back to the Hermon MCG table.
1275 * If this succeeds, then we update the "shadow" list to
1276 * reflect the change (i.e. decrement the "mcg_num_qps"),
1277 * drop the lock, and return success. Note: In general,
1278 * this operation shouldn't fail. If it does, then it is an
1279 * indication that something (probably in HW, but maybe in SW)
1280 * has gone seriously wrong.
1281 */
1282 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1283 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1284 HERMON_CMD_NOSLEEP_SPIN);
1285 if (status != HERMON_CMD_SUCCESS) {
1286 mutex_exit(&state->hs_mcglock);
1287 HERMON_WARNING(state, "failed to write MCG entry");
1288 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1289 "%08x\n", status);
1290 if (status == HERMON_CMD_INVALID_STATUS) {
1291 hermon_fm_ereport(state, HCA_SYS_ERR,
1292 HCA_ERR_SRV_LOST);
1293 }
1294 return (ibc_get_ci_failure(0));
1295 }
1296 mcg->mcg_num_qps--;
1297 }
1298
1299 mutex_exit(&state->hs_mcglock);
1300 return (DDI_SUCCESS);
1301 }
1302
1303 /*
1304 * hermon_qp_mcg_refcnt_inc()
1305 * Context: Can be called from interrupt or base context.
1306 */
1307 static void
hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)1308 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1309 {
1310 /* Increment the QP's MCG reference count */
1311 mutex_enter(&qp->qp_lock);
1312 qp->qp_mcg_refcnt++;
1313 mutex_exit(&qp->qp_lock);
1314 }
1315
1316
1317 /*
1318 * hermon_qp_mcg_refcnt_dec()
1319 * Context: Can be called from interrupt or base context.
1320 */
1321 static void
hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)1322 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1323 {
1324 /* Decrement the QP's MCG reference count */
1325 mutex_enter(&qp->qp_lock);
1326 qp->qp_mcg_refcnt--;
1327 mutex_exit(&qp->qp_lock);
1328 }
1329
1330
1331 /*
1332 * hermon_mcg_qplist_add()
1333 * Context: Can be called from interrupt or base context.
1334 */
1335 static int
hermon_mcg_qplist_add(hermon_state_t * state,hermon_mcghdl_t mcg,hermon_hw_mcg_qp_list_t * mcg_qplist,hermon_qphdl_t qp,uint_t * qp_found)1336 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1337 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1338 uint_t *qp_found)
1339 {
1340 uint_t qplist_indx;
1341
1342 ASSERT(MUTEX_HELD(&state->hs_mcglock));
1343
1344 qplist_indx = mcg->mcg_num_qps;
1345
1346 /*
1347 * Determine if we have exceeded the maximum number of QP per
1348 * multicast group. If we have, then return an error
1349 */
1350 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1351 return (IBT_HCA_MCG_QP_EXCEEDED);
1352 }
1353
1354 /*
1355 * Determine if the QP is already attached to this MCG table. If it
1356 * is, then we break out and treat this operation as a NO-OP
1357 */
1358 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1359 qplist_indx++) {
1360 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1361 break;
1362 }
1363 }
1364
1365 /*
1366 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1367 * return SUCCESS in this case, but the qplist will not have been
1368 * updated because the QP was already on the list.
1369 */
1370 if (qplist_indx < mcg->mcg_num_qps) {
1371 *qp_found = 1;
1372 } else {
1373 /*
1374 * Otherwise, append the new QP number to the end of the
1375 * current QP list. Note: We will increment the "mcg_num_qps"
1376 * field on the "shadow" MCG list entry later (after we know
1377 * that all necessary Hermon firmware accesses have been
1378 * successful).
1379 *
1380 * Set 'qp_found' to 0 so we know the QP was added on to the
1381 * list for sure.
1382 */
1383 mcg_qplist[qplist_indx].qpn =
1384 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1385 *qp_found = 0;
1386 }
1387
1388 return (DDI_SUCCESS);
1389 }
1390
1391
1392
1393 /*
1394 * hermon_mcg_qplist_remove()
1395 * Context: Can be called from interrupt or base context.
1396 */
1397 static int
hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,hermon_hw_mcg_qp_list_t * mcg_qplist,hermon_qphdl_t qp)1398 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1399 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1400 {
1401 uint_t i, qplist_indx;
1402
1403 /*
1404 * Search the MCG QP list for a matching QPN. When
1405 * it's found, we swap the last entry with the current
1406 * one, set the last entry to zero, decrement the last
1407 * entry, and return. If it's not found, then it's
1408 * and error.
1409 */
1410 qplist_indx = mcg->mcg_num_qps;
1411 for (i = 0; i < qplist_indx; i++) {
1412 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1413 mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1414 mcg_qplist[qplist_indx - 1].qpn = 0;
1415
1416 return (DDI_SUCCESS);
1417 }
1418 }
1419
1420 return (IBT_QP_HDL_INVALID);
1421 }
1422
1423
1424 /*
1425 * hermon_mcg_walk_mgid_hash()
1426 * Context: Can be called from interrupt or base context.
1427 */
1428 static uint_t
hermon_mcg_walk_mgid_hash(hermon_state_t * state,uint64_t start_indx,ib_gid_t mgid,uint_t * p_indx)1429 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1430 ib_gid_t mgid, uint_t *p_indx)
1431 {
1432 hermon_mcghdl_t curr_mcghdl;
1433 uint_t curr_indx, prev_indx;
1434
1435 ASSERT(MUTEX_HELD(&state->hs_mcglock));
1436
1437 /* Start at the head of the hash chain */
1438 curr_indx = (uint_t)start_indx;
1439 prev_indx = curr_indx;
1440 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1441
1442 /* If the first entry in the chain has MGID == 0, then stop */
1443 if ((curr_mcghdl->mcg_mgid_h == 0) &&
1444 (curr_mcghdl->mcg_mgid_l == 0)) {
1445 goto end_mgid_hash_walk;
1446 }
1447
1448 /* If the first entry in the chain matches the MGID, then stop */
1449 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1450 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1451 goto end_mgid_hash_walk;
1452 }
1453
1454 /* Otherwise, walk the hash chain looking for a match */
1455 while (curr_mcghdl->mcg_next_indx != 0) {
1456 prev_indx = curr_indx;
1457 curr_indx = curr_mcghdl->mcg_next_indx;
1458 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1459
1460 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1461 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1462 break;
1463 }
1464 }
1465
1466 end_mgid_hash_walk:
1467 /*
1468 * If necessary, return the index of the previous entry too. This
1469 * is primarily used for detaching a QP from a multicast group. It
1470 * may be necessary, in that case, to delete an MCG entry from the
1471 * hash chain and having the index of the previous entry is helpful.
1472 */
1473 if (p_indx != NULL) {
1474 *p_indx = prev_indx;
1475 }
1476 return (curr_indx);
1477 }
1478
1479
1480 /*
1481 * hermon_mcg_setup_new_hdr()
1482 * Context: Can be called from interrupt or base context.
1483 */
1484 static void
hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,hermon_hw_mcg_t * mcg_hdr,ib_gid_t mgid,hermon_rsrc_t * mcg_rsrc)1485 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1486 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1487 {
1488 /*
1489 * Fill in the fields of the "shadow" entry used by software
1490 * to track MCG hardware entry
1491 */
1492 mcg->mcg_mgid_h = mgid.gid_prefix;
1493 mcg->mcg_mgid_l = mgid.gid_guid;
1494 mcg->mcg_rsrcp = mcg_rsrc;
1495 mcg->mcg_next_indx = 0;
1496 mcg->mcg_num_qps = 0;
1497
1498 /*
1499 * Fill the header fields of the MCG entry (in the temporary copy)
1500 */
1501 mcg_hdr->mgid_h = mgid.gid_prefix;
1502 mcg_hdr->mgid_l = mgid.gid_guid;
1503 mcg_hdr->next_gid_indx = 0;
1504 }
1505
1506
1507 /*
1508 * hermon_mcg_hash_list_remove()
1509 * Context: Can be called only from user or kernel context.
1510 */
1511 static int
hermon_mcg_hash_list_remove(hermon_state_t * state,uint_t curr_indx,uint_t prev_indx,hermon_hw_mcg_t * mcg_entry)1512 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1513 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1514 {
1515 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg;
1516 uint_t next_indx;
1517 int status;
1518
1519 /* Get the pointer to "shadow" list for current entry */
1520 curr_mcg = &state->hs_mcghdl[curr_indx];
1521
1522 /*
1523 * If this is the first entry on a hash chain, then attempt to replace
1524 * the entry with the next entry on the chain. If there are no
1525 * subsequent entries on the chain, then this is the only entry and
1526 * should be invalidated.
1527 */
1528 if (curr_indx == prev_indx) {
1529
1530 /*
1531 * If this is the only entry on the chain, then invalidate it.
1532 * Note: Invalidating an MCG entry means writing all zeros
1533 * to the entry. This is only necessary for those MCG
1534 * entries that are the "head" entries of the individual hash
1535 * chains. Regardless of whether this operation returns
1536 * success or failure, return that result to the caller.
1537 */
1538 next_indx = curr_mcg->mcg_next_indx;
1539 if (next_indx == 0) {
1540 status = hermon_mcg_entry_invalidate(state, mcg_entry,
1541 curr_indx);
1542 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1543 return (status);
1544 }
1545
1546 /*
1547 * Otherwise, this is just the first entry on the chain, so
1548 * grab the next one
1549 */
1550 next_mcg = &state->hs_mcghdl[next_indx];
1551
1552 /*
1553 * Read the next MCG entry into the temporary MCG. Note:
1554 * In general, this operation shouldn't fail. If it does,
1555 * then it is an indication that something (probably in HW,
1556 * but maybe in SW) has gone seriously wrong.
1557 */
1558 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1559 HERMON_CMD_NOSLEEP_SPIN);
1560 if (status != HERMON_CMD_SUCCESS) {
1561 HERMON_WARNING(state, "failed to read MCG entry");
1562 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1563 "%08x\n", status);
1564 if (status == HERMON_CMD_INVALID_STATUS) {
1565 hermon_fm_ereport(state, HCA_SYS_ERR,
1566 HCA_ERR_SRV_LOST);
1567 }
1568 return (ibc_get_ci_failure(0));
1569 }
1570
1571 /*
1572 * Copy/Write the temporary MCG back to the hardware MCG list
1573 * using the current index. This essentially removes the
1574 * current MCG entry from the list by writing over it with
1575 * the next one. If this is successful, then we can do the
1576 * same operation for the "shadow" list. And we can also
1577 * free up the Hermon MCG entry resource that was associated
1578 * with the (old) next entry. Note: In general, this
1579 * operation shouldn't fail. If it does, then it is an
1580 * indication that something (probably in HW, but maybe in SW)
1581 * has gone seriously wrong.
1582 */
1583 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1584 HERMON_CMD_NOSLEEP_SPIN);
1585 if (status != HERMON_CMD_SUCCESS) {
1586 HERMON_WARNING(state, "failed to write MCG entry");
1587 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1588 "%08x\n", status);
1589 if (status == HERMON_CMD_INVALID_STATUS) {
1590 hermon_fm_ereport(state, HCA_SYS_ERR,
1591 HCA_ERR_SRV_LOST);
1592 }
1593 return (ibc_get_ci_failure(0));
1594 }
1595
1596 /*
1597 * Copy all the software tracking information from the next
1598 * entry on the "shadow" MCG list into the current entry on
1599 * the list. Then invalidate (zero out) the other "shadow"
1600 * list entry.
1601 */
1602 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1603 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1604
1605 /*
1606 * Free up the Hermon MCG entry resource used by the "next"
1607 * MCG entry. That resource is no longer needed by any
1608 * MCG entry which is first on a hash chain (like the "next"
1609 * entry has just become).
1610 */
1611 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1612
1613 return (DDI_SUCCESS);
1614 }
1615
1616 /*
1617 * Else if this is the last entry on the hash chain (or a middle
1618 * entry, then we update the previous entry's "next_gid_index" field
1619 * to make it point instead to the next entry on the chain. By
1620 * skipping over the removed entry in this way, we can then free up
1621 * any resources associated with the current entry. Note: We don't
1622 * need to invalidate the "skipped over" hardware entry because it
1623 * will no be longer connected to any hash chains, and if/when it is
1624 * finally re-used, it will be written with entirely new values.
1625 */
1626
1627 /*
1628 * Read the next MCG entry into the temporary MCG. Note: In general,
1629 * this operation shouldn't fail. If it does, then it is an
1630 * indication that something (probably in HW, but maybe in SW) has
1631 * gone seriously wrong.
1632 */
1633 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1634 HERMON_CMD_NOSLEEP_SPIN);
1635 if (status != HERMON_CMD_SUCCESS) {
1636 HERMON_WARNING(state, "failed to read MCG entry");
1637 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1638 status);
1639 if (status == HERMON_CMD_INVALID_STATUS) {
1640 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1641 }
1642 return (ibc_get_ci_failure(0));
1643 }
1644
1645 /*
1646 * Finally, we update the "next_gid_indx" field in the temporary MCG
1647 * and attempt to write the entry back into the Hermon MCG table. If
1648 * this succeeds, then we update the "shadow" list to reflect the
1649 * change, free up the Hermon MCG entry resource that was associated
1650 * with the current entry, and return success. Note: In general,
1651 * this operation shouldn't fail. If it does, then it is an indication
1652 * that something (probably in HW, but maybe in SW) has gone seriously
1653 * wrong.
1654 */
1655 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1656 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1657 HERMON_CMD_NOSLEEP_SPIN);
1658 if (status != HERMON_CMD_SUCCESS) {
1659 HERMON_WARNING(state, "failed to write MCG entry");
1660 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1661 status);
1662 if (status == HERMON_CMD_INVALID_STATUS) {
1663 hermon_fm_ereport(state, HCA_SYS_ERR,
1664 HCA_ERR_SRV_LOST);
1665 }
1666 return (ibc_get_ci_failure(0));
1667 }
1668
1669 /*
1670 * Get the pointer to the "shadow" MCG list entry for the previous
1671 * MCG. Update its "mcg_next_indx" to point to the next entry
1672 * the one after the current entry. Note: This next index may be
1673 * zero, indicating the end of the list.
1674 */
1675 prev_mcg = &state->hs_mcghdl[prev_indx];
1676 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1677
1678 /*
1679 * Free up the Hermon MCG entry resource used by the current entry.
1680 * This resource is no longer needed because the chain now skips over
1681 * the current entry. Then invalidate (zero out) the current "shadow"
1682 * list entry.
1683 */
1684 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1685 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1686
1687 return (DDI_SUCCESS);
1688 }
1689
1690
1691 /*
1692 * hermon_mcg_entry_invalidate()
1693 * Context: Can be called only from user or kernel context.
1694 */
1695 static int
hermon_mcg_entry_invalidate(hermon_state_t * state,hermon_hw_mcg_t * mcg_entry,uint_t indx)1696 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1697 uint_t indx)
1698 {
1699 int status;
1700
1701 /*
1702 * Invalidate the hardware MCG entry by zeroing out this temporary
1703 * MCG and writing it the the hardware. Note: In general, this
1704 * operation shouldn't fail. If it does, then it is an indication
1705 * that something (probably in HW, but maybe in SW) has gone seriously
1706 * wrong.
1707 */
1708 bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1709 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1710 HERMON_CMD_NOSLEEP_SPIN);
1711 if (status != HERMON_CMD_SUCCESS) {
1712 HERMON_WARNING(state, "failed to write MCG entry");
1713 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1714 status);
1715 if (status == HERMON_CMD_INVALID_STATUS) {
1716 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1717 }
1718 return (ibc_get_ci_failure(0));
1719 }
1720
1721 return (DDI_SUCCESS);
1722 }
1723
1724
1725 /*
1726 * hermon_mgid_is_valid()
1727 * Context: Can be called from interrupt or base context.
1728 */
1729 static int
hermon_mgid_is_valid(ib_gid_t gid)1730 hermon_mgid_is_valid(ib_gid_t gid)
1731 {
1732 uint_t topbits, flags, scope;
1733
1734 /*
1735 * According to IBA 1.1 specification (section 4.1.1) a valid
1736 * "multicast GID" must have its top eight bits set to all ones
1737 */
1738 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1739 HERMON_MCG_TOPBITS_MASK;
1740 if (topbits != HERMON_MCG_TOPBITS) {
1741 return (0);
1742 }
1743
1744 /*
1745 * The next 4 bits are the "flag" bits. These are valid only
1746 * if they are "0" (which correspond to permanently assigned/
1747 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1748 * multicast GIDs). All other values are reserved.
1749 */
1750 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1751 HERMON_MCG_FLAGS_MASK;
1752 if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1753 (flags == HERMON_MCG_FLAGS_NONPERM))) {
1754 return (0);
1755 }
1756
1757 /*
1758 * The next 4 bits are the "scope" bits. These are valid only
1759 * if they are "2" (Link-local), "5" (Site-local), "8"
1760 * (Organization-local) or "E" (Global). All other values
1761 * are reserved (or currently unassigned).
1762 */
1763 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1764 HERMON_MCG_SCOPE_MASK;
1765 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1766 (scope == HERMON_MCG_SCOPE_SITELOC) ||
1767 (scope == HERMON_MCG_SCOPE_ORGLOC) ||
1768 (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1769 return (0);
1770 }
1771
1772 /*
1773 * If it passes all of the above checks, then we will consider it
1774 * a valid multicast GID.
1775 */
1776 return (1);
1777 }
1778
1779
1780 /*
1781 * hermon_mlid_is_valid()
1782 * Context: Can be called from interrupt or base context.
1783 */
1784 static int
hermon_mlid_is_valid(ib_lid_t lid)1785 hermon_mlid_is_valid(ib_lid_t lid)
1786 {
1787 /*
1788 * According to IBA 1.1 specification (section 4.1.1) a valid
1789 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1790 */
1791 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1792 return (0);
1793 }
1794
1795 return (1);
1796 }
1797
1798
1799 /*
1800 * hermon_pd_alloc()
1801 * Context: Can be called only from user or kernel context.
1802 */
1803 int
hermon_pd_alloc(hermon_state_t * state,hermon_pdhdl_t * pdhdl,uint_t sleepflag)1804 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1805 {
1806 hermon_rsrc_t *rsrc;
1807 hermon_pdhdl_t pd;
1808 int status;
1809
1810 /*
1811 * Allocate the software structure for tracking the protection domain
1812 * (i.e. the Hermon Protection Domain handle). By default each PD
1813 * structure will have a unique PD number assigned to it. All that
1814 * is necessary is for software to initialize the PD reference count
1815 * (to zero) and return success.
1816 */
1817 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1818 if (status != DDI_SUCCESS) {
1819 return (IBT_INSUFF_RESOURCE);
1820 }
1821 pd = (hermon_pdhdl_t)rsrc->hr_addr;
1822 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1823
1824 pd->pd_refcnt = 0;
1825 *pdhdl = pd;
1826
1827 return (DDI_SUCCESS);
1828 }
1829
1830
1831 /*
1832 * hermon_pd_free()
1833 * Context: Can be called only from user or kernel context.
1834 */
1835 int
hermon_pd_free(hermon_state_t * state,hermon_pdhdl_t * pdhdl)1836 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1837 {
1838 hermon_rsrc_t *rsrc;
1839 hermon_pdhdl_t pd;
1840
1841 /*
1842 * Pull all the necessary information from the Hermon Protection Domain
1843 * handle. This is necessary here because the resource for the
1844 * PD is going to be freed up as part of this operation.
1845 */
1846 pd = *pdhdl;
1847 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1848 rsrc = pd->pd_rsrcp;
1849
1850 /*
1851 * Check the PD reference count. If the reference count is non-zero,
1852 * then it means that this protection domain is still referenced by
1853 * some memory region, queue pair, address handle, or other IB object
1854 * If it is non-zero, then return an error. Otherwise, free the
1855 * Hermon resource and return success.
1856 */
1857 if (pd->pd_refcnt != 0) {
1858 return (IBT_PD_IN_USE);
1859 }
1860
1861 /* Free the Hermon Protection Domain handle */
1862 hermon_rsrc_free(state, &rsrc);
1863
1864 /* Set the pdhdl pointer to NULL and return success */
1865 *pdhdl = (hermon_pdhdl_t)NULL;
1866
1867 return (DDI_SUCCESS);
1868 }
1869
1870
1871 /*
1872 * hermon_pd_refcnt_inc()
1873 * Context: Can be called from interrupt or base context.
1874 */
1875 void
hermon_pd_refcnt_inc(hermon_pdhdl_t pd)1876 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1877 {
1878 /* Increment the protection domain's reference count */
1879 atomic_inc_32(&pd->pd_refcnt);
1880 }
1881
1882
1883 /*
1884 * hermon_pd_refcnt_dec()
1885 * Context: Can be called from interrupt or base context.
1886 */
1887 void
hermon_pd_refcnt_dec(hermon_pdhdl_t pd)1888 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1889 {
1890 /* Decrement the protection domain's reference count */
1891 atomic_dec_32(&pd->pd_refcnt);
1892 }
1893
1894
1895 /*
1896 * hermon_port_query()
1897 * Context: Can be called only from user or kernel context.
1898 */
1899 int
hermon_port_query(hermon_state_t * state,uint_t port,ibt_hca_portinfo_t * pi)1900 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1901 {
1902 sm_portinfo_t portinfo;
1903 sm_guidinfo_t guidinfo;
1904 sm_pkey_table_t pkeytable;
1905 ib_gid_t *sgid;
1906 uint_t sgid_max, pkey_max, tbl_size;
1907 int i, j, indx, status;
1908 ib_pkey_t *pkeyp;
1909 ib_guid_t *guidp;
1910
1911 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1912 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1913
1914 /* Validate that specified port number is legal */
1915 if (!hermon_portnum_is_valid(state, port)) {
1916 return (IBT_HCA_PORT_INVALID);
1917 }
1918 pkeyp = state->hs_pkey[port - 1];
1919 guidp = state->hs_guid[port - 1];
1920
1921 /*
1922 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1923 * to the firmware (for the specified port number). This returns
1924 * a full PortInfo MAD (in "portinfo") which we subsequently
1925 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1926 * to the IBTF.
1927 */
1928 status = hermon_getportinfo_cmd_post(state, port,
1929 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1930 if (status != HERMON_CMD_SUCCESS) {
1931 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1932 "failed: %08x\n", port, status);
1933 if (status == HERMON_CMD_INVALID_STATUS) {
1934 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1935 }
1936 return (ibc_get_ci_failure(0));
1937 }
1938
1939 /*
1940 * Parse the PortInfo MAD and fill in the IBTF structure
1941 */
1942 pi->p_base_lid = portinfo.LID;
1943 pi->p_qkey_violations = portinfo.Q_KeyViolations;
1944 pi->p_pkey_violations = portinfo.P_KeyViolations;
1945 pi->p_sm_sl = portinfo.MasterSMSL;
1946 pi->p_sm_lid = portinfo.MasterSMLID;
1947 pi->p_linkstate = portinfo.PortState;
1948 pi->p_port_num = portinfo.LocalPortNum;
1949 pi->p_phys_state = portinfo.PortPhysicalState;
1950 pi->p_width_supported = portinfo.LinkWidthSupported;
1951 pi->p_width_enabled = portinfo.LinkWidthEnabled;
1952 pi->p_width_active = portinfo.LinkWidthActive;
1953 pi->p_speed_supported = portinfo.LinkSpeedSupported;
1954 pi->p_speed_enabled = portinfo.LinkSpeedEnabled;
1955 pi->p_speed_active = portinfo.LinkSpeedActive;
1956 pi->p_mtu = portinfo.MTUCap;
1957 pi->p_lmc = portinfo.LMC;
1958 pi->p_max_vl = portinfo.VLCap;
1959 pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1960 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1961 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1962 pi->p_sgid_tbl_sz = (1 << tbl_size);
1963 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1964 pi->p_pkey_tbl_sz = (1 << tbl_size);
1965 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1966
1967 /*
1968 * Convert InfiniBand-defined port capability flags to the format
1969 * specified by the IBTF
1970 */
1971 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1972 pi->p_capabilities |= IBT_PORT_CAP_SM;
1973 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1974 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1975 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1976 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1977 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1978 pi->p_capabilities |= IBT_PORT_CAP_DM;
1979 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1980 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1981 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1982 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1983
1984 /*
1985 * Fill in the SGID table. Since the only access to the Hermon
1986 * GID tables is through the firmware's MAD_IFC interface, we
1987 * post as many GetGUIDInfo MADs as necessary to read in the entire
1988 * contents of the SGID table (for the specified port). Note: The
1989 * GetGUIDInfo command only gets eight GUIDs per operation. These
1990 * GUIDs are then appended to the GID prefix for the port (from the
1991 * GetPortInfo above) to form the entire SGID table.
1992 */
1993 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1994 status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1995 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1996 if (status != HERMON_CMD_SUCCESS) {
1997 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1998 "command failed: %08x\n", port, status);
1999 if (status == HERMON_CMD_INVALID_STATUS) {
2000 hermon_fm_ereport(state, HCA_SYS_ERR,
2001 HCA_ERR_SRV_LOST);
2002 }
2003 return (ibc_get_ci_failure(0));
2004 }
2005
2006 /* Figure out how many of the entries are valid */
2007 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
2008 for (j = 0; j < sgid_max; j++) {
2009 indx = (i + j);
2010 sgid = &pi->p_sgid_tbl[indx];
2011 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
2012 sgid->gid_prefix = portinfo.GidPrefix;
2013 guidp[indx] = sgid->gid_guid =
2014 guidinfo.GUIDBlocks[j];
2015 }
2016 }
2017
2018 /*
2019 * Fill in the PKey table. Just as for the GID tables above, the
2020 * only access to the Hermon PKey tables is through the firmware's
2021 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
2022 * to read in the entire contents of the PKey table (for the specified
2023 * port). Note: The GetPKeyTable command only gets 32 PKeys per
2024 * operation.
2025 */
2026 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
2027 status = hermon_getpkeytable_cmd_post(state, port, i,
2028 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
2029 if (status != HERMON_CMD_SUCCESS) {
2030 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2031 "command failed: %08x\n", port, status);
2032 if (status == HERMON_CMD_INVALID_STATUS) {
2033 hermon_fm_ereport(state, HCA_SYS_ERR,
2034 HCA_ERR_SRV_LOST);
2035 }
2036 return (ibc_get_ci_failure(0));
2037 }
2038
2039 /* Figure out how many of the entries are valid */
2040 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2041 for (j = 0; j < pkey_max; j++) {
2042 indx = (i + j);
2043 pkeyp[indx] = pi->p_pkey_tbl[indx] =
2044 pkeytable.P_KeyTableBlocks[j];
2045 }
2046 }
2047
2048 return (DDI_SUCCESS);
2049 }
2050
2051
2052 /*
2053 * hermon_port_modify()
2054 * Context: Can be called only from user or kernel context.
2055 */
2056 /* ARGSUSED */
2057 int
hermon_port_modify(hermon_state_t * state,uint8_t port,ibt_port_modify_flags_t flags,uint8_t init_type)2058 hermon_port_modify(hermon_state_t *state, uint8_t port,
2059 ibt_port_modify_flags_t flags, uint8_t init_type)
2060 {
2061 sm_portinfo_t portinfo;
2062 uint32_t capmask;
2063 int status;
2064 hermon_hw_set_port_t set_port;
2065
2066 /*
2067 * Return an error if either of the unsupported flags are set
2068 */
2069 if ((flags & IBT_PORT_SHUTDOWN) ||
2070 (flags & IBT_PORT_SET_INIT_TYPE)) {
2071 return (IBT_NOT_SUPPORTED);
2072 }
2073
2074 bzero(&set_port, sizeof (set_port));
2075
2076 /*
2077 * Determine whether we are trying to reset the QKey counter
2078 */
2079 if (flags & IBT_PORT_RESET_QKEY)
2080 set_port.rqk = 1;
2081
2082 /* Validate that specified port number is legal */
2083 if (!hermon_portnum_is_valid(state, port)) {
2084 return (IBT_HCA_PORT_INVALID);
2085 }
2086
2087 /*
2088 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2089 * firmware (for the specified port number). This returns a full
2090 * PortInfo MAD (in "portinfo") from which we pull the current
2091 * capability mask. We then modify the capability mask as directed
2092 * by the "pmod_flags" field, and write the updated capability mask
2093 * using the Hermon SET_IB command (below).
2094 */
2095 status = hermon_getportinfo_cmd_post(state, port,
2096 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2097 if (status != HERMON_CMD_SUCCESS) {
2098 if (status == HERMON_CMD_INVALID_STATUS) {
2099 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2100 }
2101 return (ibc_get_ci_failure(0));
2102 }
2103
2104 /*
2105 * Convert InfiniBand-defined port capability flags to the format
2106 * specified by the IBTF. Specifically, we modify the capability
2107 * mask based on the specified values.
2108 */
2109 capmask = portinfo.CapabilityMask;
2110
2111 if (flags & IBT_PORT_RESET_SM)
2112 capmask &= ~SM_CAP_MASK_IS_SM;
2113 else if (flags & IBT_PORT_SET_SM)
2114 capmask |= SM_CAP_MASK_IS_SM;
2115
2116 if (flags & IBT_PORT_RESET_SNMP)
2117 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2118 else if (flags & IBT_PORT_SET_SNMP)
2119 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2120
2121 if (flags & IBT_PORT_RESET_DEVMGT)
2122 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2123 else if (flags & IBT_PORT_SET_DEVMGT)
2124 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2125
2126 if (flags & IBT_PORT_RESET_VENDOR)
2127 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2128 else if (flags & IBT_PORT_SET_VENDOR)
2129 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2130
2131 set_port.cap_mask = capmask;
2132
2133 /*
2134 * Use the Hermon SET_PORT command to update the capability mask and
2135 * (possibly) reset the QKey violation counter for the specified port.
2136 * Note: In general, this operation shouldn't fail. If it does, then
2137 * it is an indication that something (probably in HW, but maybe in
2138 * SW) has gone seriously wrong.
2139 */
2140 status = hermon_set_port_cmd_post(state, &set_port, port,
2141 HERMON_SLEEPFLAG_FOR_CONTEXT());
2142 if (status != HERMON_CMD_SUCCESS) {
2143 HERMON_WARNING(state, "failed to modify port capabilities");
2144 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2145 "%08x\n", port, status);
2146 if (status == HERMON_CMD_INVALID_STATUS) {
2147 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2148 }
2149 return (ibc_get_ci_failure(0));
2150 }
2151
2152 return (DDI_SUCCESS);
2153 }
2154
2155
2156 /*
2157 * hermon_set_addr_path()
2158 * Context: Can be called from interrupt or base context.
2159 *
2160 * Note: This routine is used for two purposes. It is used to fill in the
2161 * Hermon UDAV fields, and it is used to fill in the address path information
2162 * for QPs. Because the two Hermon structures are similar, common fields can
2163 * be filled in here. Because they are different, however, we pass
2164 * an additional flag to indicate which type is being filled and do each one
2165 * uniquely
2166 */
2167
2168 int hermon_srate_override = -1; /* allows ease of testing */
2169
2170 int
hermon_set_addr_path(hermon_state_t * state,ibt_adds_vect_t * av,hermon_hw_addr_path_t * path,uint_t type)2171 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2172 hermon_hw_addr_path_t *path, uint_t type)
2173 {
2174 uint_t gidtbl_sz;
2175 hermon_hw_udav_t *udav;
2176
2177 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2178 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2179
2180 udav = (hermon_hw_udav_t *)(void *)path;
2181 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
2182 path->mlid = av->av_src_path;
2183 path->rlid = av->av_dlid;
2184
2185 switch (av->av_srate) {
2186 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */
2187 path->max_stat_rate = 7; break;
2188 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */
2189 path->max_stat_rate = 8; break;
2190 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */
2191 path->max_stat_rate = 9; break;
2192 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */
2193 path->max_stat_rate = 10; break;
2194 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */
2195 path->max_stat_rate = 11; break;
2196 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */
2197 path->max_stat_rate = 12; break;
2198 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */
2199 path->max_stat_rate = 13; break;
2200 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */
2201 path->max_stat_rate = 14; break;
2202 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */
2203 path->max_stat_rate = 15; break;
2204 case IBT_SRATE_NOT_SPECIFIED: /* Max */
2205 path->max_stat_rate = 0; break;
2206 default:
2207 return (IBT_STATIC_RATE_INVALID);
2208 }
2209 if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2210 path->max_stat_rate = hermon_srate_override;
2211
2212 /* If "grh" flag is set, then check for valid SGID index too */
2213 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2214 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2215 return (IBT_SGID_INVALID);
2216 }
2217
2218 /*
2219 * Fill in all "global" values regardless of the value in the GRH
2220 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2221 * hardware will ignore the other "global" values as necessary. Note:
2222 * SW does this here to enable later query operations to return
2223 * exactly the same params that were passed when the addr path was
2224 * last written.
2225 */
2226 path->grh = av->av_send_grh;
2227 if (type == HERMON_ADDRPATH_QP) {
2228 path->mgid_index = av->av_sgid_ix;
2229 } else {
2230 /*
2231 * For Hermon UDAV, the "mgid_index" field is the index into
2232 * a combined table (not a per-port table), but having sections
2233 * for each port. So some extra calculations are necessary.
2234 */
2235
2236 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2237 av->av_sgid_ix;
2238
2239 udav->portnum = av->av_port_num;
2240 }
2241
2242 /*
2243 * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2244 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2245 * only need to do it for UDAV's. So we enforce that here.
2246 *
2247 * NOTE: The entire 64 bits worth of GUID info is actually being
2248 * preserved (for UDAVs) by the callers of this function
2249 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2250 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2251 * "don't care".
2252 */
2253 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2254 path->flow_label = av->av_flow;
2255 path->tclass = av->av_tclass;
2256 path->hop_limit = av->av_hop;
2257 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2258 sizeof (uint64_t));
2259 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2260 sizeof (uint64_t));
2261 } else {
2262 path->rgid_l = 0x2;
2263 path->flow_label = 0;
2264 path->tclass = 0;
2265 path->hop_limit = 0;
2266 path->rgid_h = 0;
2267 }
2268 /* extract the default service level */
2269 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2270
2271 return (DDI_SUCCESS);
2272 }
2273
2274
2275 /*
2276 * hermon_get_addr_path()
2277 * Context: Can be called from interrupt or base context.
2278 *
2279 * Note: Just like hermon_set_addr_path() above, this routine is used for two
2280 * purposes. It is used to read in the Hermon UDAV fields, and it is used to
2281 * read in the address path information for QPs. Because the two Hermon
2282 * structures are similar, common fields can be read in here. But because
2283 * they are slightly different, we pass an additional flag to indicate which
2284 * type is being read.
2285 */
2286 void
hermon_get_addr_path(hermon_state_t * state,hermon_hw_addr_path_t * path,ibt_adds_vect_t * av,uint_t type)2287 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2288 ibt_adds_vect_t *av, uint_t type)
2289 {
2290 uint_t gidtbl_sz;
2291
2292 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2293 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2294
2295 av->av_src_path = path->mlid;
2296 av->av_dlid = path->rlid;
2297
2298 /* Set "av_ipd" value from max_stat_rate */
2299 switch (path->max_stat_rate) {
2300 case 7: /* 1xSDR-2.5Gb/s injection rate */
2301 av->av_srate = IBT_SRATE_2; break;
2302 case 8: /* 4xSDR-10.0Gb/s injection rate */
2303 av->av_srate = IBT_SRATE_10; break;
2304 case 9: /* 12xSDR-30Gb/s injection rate */
2305 av->av_srate = IBT_SRATE_30; break;
2306 case 10: /* 1xDDR-5Gb/s injection rate */
2307 av->av_srate = IBT_SRATE_5; break;
2308 case 11: /* 4xDDR-20Gb/s injection rate */
2309 av->av_srate = IBT_SRATE_20; break;
2310 case 12: /* xQDR-40Gb/s injection rate */
2311 av->av_srate = IBT_SRATE_40; break;
2312 case 13: /* 12xDDR-60Gb/s injection rate */
2313 av->av_srate = IBT_SRATE_60; break;
2314 case 14: /* 8xQDR-80Gb/s injection rate */
2315 av->av_srate = IBT_SRATE_80; break;
2316 case 15: /* 12xQDR-120Gb/s injection rate */
2317 av->av_srate = IBT_SRATE_120; break;
2318 case 0: /* max */
2319 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2320 default: /* 1x injection rate */
2321 av->av_srate = IBT_SRATE_1X;
2322 }
2323
2324 /*
2325 * Extract all "global" values regardless of the value in the GRH
2326 * flag. Because "av_send_grh" is set only if "grh" is set, software
2327 * knows to ignore the other "global" values as necessary. Note: SW
2328 * does it this way to enable these query operations to return exactly
2329 * the same params that were passed when the addr path was last written.
2330 */
2331 av->av_send_grh = path->grh;
2332 if (type == HERMON_ADDRPATH_QP) {
2333 av->av_sgid_ix = path->mgid_index;
2334 } else {
2335 /*
2336 * For Hermon UDAV, the "mgid_index" field is the index into
2337 * a combined table (not a per-port table).
2338 */
2339 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2340 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2341 gidtbl_sz);
2342
2343 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2344 }
2345 av->av_flow = path->flow_label;
2346 av->av_tclass = path->tclass;
2347 av->av_hop = path->hop_limit;
2348 /* this is for alignment issue w/ the addr path struct in Hermon */
2349 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2350 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2351 }
2352
2353
2354 /*
2355 * hermon_portnum_is_valid()
2356 * Context: Can be called from interrupt or base context.
2357 */
2358 int
hermon_portnum_is_valid(hermon_state_t * state,uint_t portnum)2359 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2360 {
2361 uint_t max_port;
2362
2363 max_port = state->hs_cfg_profile->cp_num_ports;
2364 if ((portnum <= max_port) && (portnum != 0)) {
2365 return (1);
2366 } else {
2367 return (0);
2368 }
2369 }
2370
2371
2372 /*
2373 * hermon_pkeyindex_is_valid()
2374 * Context: Can be called from interrupt or base context.
2375 */
2376 int
hermon_pkeyindex_is_valid(hermon_state_t * state,uint_t pkeyindx)2377 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2378 {
2379 uint_t max_pkeyindx;
2380
2381 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2382 if (pkeyindx < max_pkeyindx) {
2383 return (1);
2384 } else {
2385 return (0);
2386 }
2387 }
2388
2389
2390 /*
2391 * hermon_queue_alloc()
2392 * Context: Can be called from interrupt or base context.
2393 */
2394 int
hermon_queue_alloc(hermon_state_t * state,hermon_qalloc_info_t * qa_info,uint_t sleepflag)2395 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2396 uint_t sleepflag)
2397 {
2398 ddi_dma_attr_t dma_attr;
2399 int (*callback)(caddr_t);
2400 uint64_t realsize, alloc_mask;
2401 int flag, status;
2402
2403 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2404
2405 /* Set the callback flag appropriately */
2406 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2407 DDI_DMA_DONTWAIT;
2408
2409 /*
2410 * Initialize many of the default DMA attributes. Then set additional
2411 * alignment restrictions as necessary for the queue memory. Also
2412 * respect the configured value for IOMMU bypass
2413 */
2414 hermon_dma_attr_init(state, &dma_attr);
2415 dma_attr.dma_attr_align = qa_info->qa_bind_align;
2416 #ifdef __sparc
2417 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) {
2418 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2419 }
2420 #endif
2421
2422 /* Allocate a DMA handle */
2423 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2424 &qa_info->qa_dmahdl);
2425 if (status != DDI_SUCCESS) {
2426 return (DDI_FAILURE);
2427 }
2428
2429 /*
2430 * Determine the amount of memory to allocate, depending on the values
2431 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2432 * to solve here is that allocating a DMA handle with IOMMU bypass
2433 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2434 * that are less restrictive than the page size. Since we may need
2435 * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2436 * (e.g. in Hermon QP work queue memory allocation), we use the
2437 * following method to calculate how much additional memory to request,
2438 * and we enforce our own alignment on the allocated result.
2439 */
2440 alloc_mask = qa_info->qa_alloc_align - 1;
2441 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2442 realsize = qa_info->qa_size;
2443 } else {
2444 realsize = qa_info->qa_size + alloc_mask;
2445 }
2446
2447 /*
2448 * If we are to allocate the queue from system memory, then use
2449 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a
2450 * host memory allocation, use ddi_umem_alloc(). In either case,
2451 * return a pointer to the memory range allocated (including any
2452 * necessary alignment adjustments), the "real" memory pointer,
2453 * the "real" size, and a ddi_acc_handle_t to use when reading
2454 * from/writing to the memory.
2455 */
2456 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2457 /* Allocate system memory for the queue */
2458 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2459 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2460 (caddr_t *)&qa_info->qa_buf_real,
2461 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2462 if (status != DDI_SUCCESS) {
2463 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2464 return (DDI_FAILURE);
2465 }
2466
2467 /*
2468 * Save temporary copy of the real pointer. (This may be
2469 * modified in the last step below).
2470 */
2471 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2472
2473 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2474
2475 } else { /* HERMON_QUEUE_LOCATION_USERLAND */
2476
2477 /* Allocate userland mappable memory for the queue */
2478 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2479 DDI_UMEM_NOSLEEP;
2480 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2481 &qa_info->qa_umemcookie);
2482 if (qa_info->qa_buf_real == NULL) {
2483 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2484 return (DDI_FAILURE);
2485 }
2486
2487 /*
2488 * Save temporary copy of the real pointer. (This may be
2489 * modified in the last step below).
2490 */
2491 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2492
2493 }
2494
2495 /*
2496 * The next to last step is to ensure that the final address
2497 * ("qa_buf_aligned") has the appropriate "alloc" alignment
2498 * restriction applied to it (if necessary).
2499 */
2500 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2501 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2502 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2503 }
2504 /*
2505 * The last step is to figure out the offset of the start relative
2506 * to the first page of the region - will be used in the eqc/cqc
2507 * passed to the HW
2508 */
2509 qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2510 qa_info->qa_buf_aligned & HERMON_PAGEOFFSET);
2511
2512 return (DDI_SUCCESS);
2513 }
2514
2515
2516 /*
2517 * hermon_queue_free()
2518 * Context: Can be called from interrupt or base context.
2519 */
2520 void
hermon_queue_free(hermon_qalloc_info_t * qa_info)2521 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2522 {
2523 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2524
2525 /*
2526 * Depending on how (i.e. from where) we allocated the memory for
2527 * this queue, we choose the appropriate method for releasing the
2528 * resources.
2529 */
2530 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2531
2532 ddi_dma_mem_free(&qa_info->qa_acchdl);
2533
2534 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2535
2536 ddi_umem_free(qa_info->qa_umemcookie);
2537
2538 }
2539
2540 /* Always free the dma handle */
2541 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2542 }
2543
2544 /*
2545 * hermon_create_fmr_pool()
2546 * Create a pool of FMRs.
2547 * Context: Can be called from kernel context only.
2548 */
2549 int
hermon_create_fmr_pool(hermon_state_t * state,hermon_pdhdl_t pd,ibt_fmr_pool_attr_t * fmr_attr,hermon_fmrhdl_t * fmrpoolp)2550 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2551 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2552 {
2553 hermon_fmrhdl_t fmrpool;
2554 hermon_fmr_list_t *fmr, *fmr_next;
2555 hermon_mrhdl_t mr;
2556 int status;
2557 int sleep;
2558 int i;
2559
2560 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2561 HERMON_NOSLEEP;
2562 if ((sleep == HERMON_SLEEP) &&
2563 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2564 return (IBT_INVALID_PARAM);
2565 }
2566
2567 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2568 if (fmrpool == NULL) {
2569 status = IBT_INSUFF_RESOURCE;
2570 goto fail;
2571 }
2572 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool))
2573
2574 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2575 DDI_INTR_PRI(state->hs_intrmsi_pri));
2576 mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER,
2577 DDI_INTR_PRI(state->hs_intrmsi_pri));
2578 mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER,
2579 DDI_INTR_PRI(state->hs_intrmsi_pri));
2580
2581 fmrpool->fmr_state = state;
2582 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2583 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg;
2584 fmrpool->fmr_pool_size = 0;
2585 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr;
2586 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz;
2587 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4;
2588 fmrpool->fmr_dirty_len = 0;
2589 fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32;
2590 fmrpool->fmr_remap_len = 0;
2591 fmrpool->fmr_flags = fmr_attr->fmr_flags;
2592 fmrpool->fmr_stat_register = 0;
2593 fmrpool->fmr_max_remaps = state->hs_cfg_profile->cp_fmr_max_remaps;
2594 fmrpool->fmr_remap_gen = 1;
2595
2596 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2597 fmrpool->fmr_dirty_list = NULL;
2598 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2599 fmrpool->fmr_remap_list = NULL;
2600 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2601 fmrpool->fmr_pool_size = fmrpool->fmr_free_len =
2602 fmr_attr->fmr_pool_size;
2603
2604 for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2605 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2606 if (status != DDI_SUCCESS) {
2607 goto fail2;
2608 }
2609
2610 fmr = (hermon_fmr_list_t *)kmem_zalloc(
2611 sizeof (hermon_fmr_list_t), sleep);
2612 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2613
2614 fmr->fmr = mr;
2615 fmr->fmr_remaps = 0;
2616 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2617 fmr->fmr_pool = fmrpool;
2618 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2619 mr->mr_fmr = fmr;
2620
2621 if (!i) /* address of last entry's link */
2622 fmrpool->fmr_free_list_tail = &fmr->fmr_next;
2623 fmr->fmr_next = fmrpool->fmr_free_list;
2624 fmrpool->fmr_free_list = fmr;
2625 }
2626
2627 /* Set to return pool */
2628 *fmrpoolp = fmrpool;
2629
2630 IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS");
2631 return (IBT_SUCCESS);
2632 fail2:
2633 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2635 fmr_next = fmr->fmr_next;
2636 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2637 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2638 }
2639 kmem_free(fmrpool, sizeof (*fmrpool));
2640 fail:
2641 *fmrpoolp = NULL;
2642 IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED");
2643 if (status == DDI_FAILURE) {
2644 return (ibc_get_ci_failure(0));
2645 } else {
2646 return (status);
2647 }
2648 }
2649
2650 /*
2651 * hermon_destroy_fmr_pool()
2652 * Destroy an FMR pool and free all associated resources.
2653 * Context: Can be called from kernel context only.
2654 */
2655 int
hermon_destroy_fmr_pool(hermon_state_t * state,hermon_fmrhdl_t fmrpool)2656 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2657 {
2658 hermon_fmr_list_t *fmr, *fmr_next;
2659
2660 mutex_enter(&fmrpool->fmr_lock);
2661 hermon_fmr_cleanup(fmrpool);
2662
2663 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2664 fmr_next = fmr->fmr_next;
2665
2666 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2667 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2668
2669 --fmrpool->fmr_pool_size;
2670 }
2671 ASSERT(fmrpool->fmr_pool_size == 0);
2672 mutex_exit(&fmrpool->fmr_lock);
2673
2674 mutex_destroy(&fmrpool->fmr_lock);
2675 mutex_destroy(&fmrpool->dirty_lock);
2676 mutex_destroy(&fmrpool->remap_lock);
2677
2678 kmem_free(fmrpool, sizeof (*fmrpool));
2679 IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS");
2680 return (DDI_SUCCESS);
2681 }
2682
2683 /*
2684 * hermon_flush_fmr_pool()
2685 * Ensure that all unmapped FMRs are fully invalidated.
2686 * Context: Can be called from kernel context only.
2687 */
2688 /* ARGSUSED */
2689 int
hermon_flush_fmr_pool(hermon_state_t * state,hermon_fmrhdl_t fmrpool)2690 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2691 {
2692 /*
2693 * Force the unmapping of all entries on the dirty list, regardless of
2694 * whether the watermark has been hit yet.
2695 */
2696 /* grab the pool lock */
2697 mutex_enter(&fmrpool->fmr_lock);
2698 hermon_fmr_cleanup(fmrpool);
2699 mutex_exit(&fmrpool->fmr_lock);
2700 return (DDI_SUCCESS);
2701 }
2702
2703 /*
2704 * hermon_register_physical_fmr()
2705 * Map memory into FMR
2706 * Context: Can be called from interrupt or base context.
2707 */
2708 int
hermon_register_physical_fmr(hermon_state_t * state,hermon_fmrhdl_t fmrpool,ibt_pmr_attr_t * mem_pattr,hermon_mrhdl_t * mr,ibt_pmr_desc_t * mem_desc_p)2709 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2710 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2711 ibt_pmr_desc_t *mem_desc_p)
2712 {
2713 hermon_fmr_list_t *fmr;
2714 int status;
2715
2716 /* Check length */
2717 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2718 fmrpool->fmr_max_pages)) {
2719 return (IBT_MR_LEN_INVALID);
2720 }
2721
2722 mutex_enter(&fmrpool->fmr_lock);
2723 if (fmrpool->fmr_free_list == NULL) {
2724 if (hermon_fmr_verbose & 2)
2725 IBTF_DPRINTF_L2("fmr", "register needs remap");
2726 mutex_enter(&fmrpool->remap_lock);
2727 if (fmrpool->fmr_remap_list) {
2728 /* add to free list */
2729 *(fmrpool->fmr_free_list_tail) =
2730 fmrpool->fmr_remap_list;
2731 fmrpool->fmr_remap_list = NULL;
2732 fmrpool->fmr_free_list_tail =
2733 fmrpool->fmr_remap_list_tail;
2734
2735 /* reset list */
2736 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2737 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2738 fmrpool->fmr_remap_len = 0;
2739 }
2740 mutex_exit(&fmrpool->remap_lock);
2741 }
2742 if (fmrpool->fmr_free_list == NULL) {
2743 if (hermon_fmr_verbose & 2)
2744 IBTF_DPRINTF_L2("fmr", "register needs cleanup");
2745 hermon_fmr_cleanup(fmrpool);
2746 }
2747
2748 /* grab next free entry */
2749 fmr = fmrpool->fmr_free_list;
2750 if (fmr == NULL) {
2751 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource");
2752 cmn_err(CE_CONT, "no free fmr resource\n");
2753 mutex_exit(&fmrpool->fmr_lock);
2754 return (IBT_INSUFF_RESOURCE);
2755 }
2756
2757 if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL)
2758 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2759 fmr->fmr_next = NULL;
2760 fmrpool->fmr_stat_register++;
2761 mutex_exit(&fmrpool->fmr_lock);
2762
2763 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2764 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2765 mem_desc_p);
2766 if (status != DDI_SUCCESS) {
2767 return (status);
2768 }
2769 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2770 if (hermon_rdma_debug & 0x4)
2771 IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x",
2772 fmr->fmr, fmr->fmr->mr_rkey);
2773 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2774 if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) {
2775 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2776 fmr->fmr_remaps = 0;
2777 }
2778
2779 fmr->fmr_remaps++;
2780
2781 *mr = (hermon_mrhdl_t)fmr->fmr;
2782
2783 return (DDI_SUCCESS);
2784 }
2785
2786 /*
2787 * hermon_deregister_fmr()
2788 * Unmap FMR
2789 * Context: Can be called from kernel context only.
2790 */
2791 int
hermon_deregister_fmr(hermon_state_t * state,hermon_mrhdl_t mr)2792 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2793 {
2794 hermon_fmrhdl_t fmrpool;
2795 hermon_fmr_list_t *fmr, **fmrlast;
2796 int len;
2797
2798 fmr = mr->mr_fmr;
2799 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2800 fmrpool = fmr->fmr_pool;
2801
2802 /* mark as owned by software */
2803 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2804 *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0;
2805
2806 if (fmr->fmr_remaps <
2807 state->hs_cfg_profile->cp_fmr_max_remaps) {
2808 /* add to remap list */
2809 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2810 if (hermon_rdma_debug & 0x4)
2811 IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x",
2812 fmr->fmr, fmr->fmr->mr_rkey);
2813 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2814 mutex_enter(&fmrpool->remap_lock);
2815 fmr->fmr_next = NULL;
2816 *(fmrpool->fmr_remap_list_tail) = fmr;
2817 fmrpool->fmr_remap_list_tail = &fmr->fmr_next;
2818 fmrpool->fmr_remap_len++;
2819
2820 /* conditionally add remap list back to free list */
2821 fmrlast = NULL;
2822 if (fmrpool->fmr_remap_len >=
2823 fmrpool->fmr_remap_watermark) {
2824 fmr = fmrpool->fmr_remap_list;
2825 fmrlast = fmrpool->fmr_remap_list_tail;
2826 len = fmrpool->fmr_remap_len;
2827 fmrpool->fmr_remap_len = 0;
2828 fmrpool->fmr_remap_list = NULL;
2829 fmrpool->fmr_remap_list_tail =
2830 &fmrpool->fmr_remap_list;
2831 }
2832 mutex_exit(&fmrpool->remap_lock);
2833 if (fmrlast) {
2834 mutex_enter(&fmrpool->fmr_lock);
2835 *(fmrpool->fmr_free_list_tail) = fmr;
2836 fmrpool->fmr_free_list_tail = fmrlast;
2837 fmrpool->fmr_free_len += len;
2838 mutex_exit(&fmrpool->fmr_lock);
2839 }
2840 } else {
2841 /* add to dirty list */
2842 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2843 if (hermon_rdma_debug & 0x4)
2844 IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x",
2845 fmr->fmr, fmr->fmr->mr_rkey);
2846 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2847
2848 mutex_enter(&fmrpool->dirty_lock);
2849 fmr->fmr_next = NULL;
2850 *(fmrpool->fmr_dirty_list_tail) = fmr;
2851 fmrpool->fmr_dirty_list_tail = &fmr->fmr_next;
2852 fmrpool->fmr_dirty_len++;
2853
2854 if (fmrpool->fmr_dirty_len >=
2855 fmrpool->fmr_dirty_watermark) {
2856 mutex_exit(&fmrpool->dirty_lock);
2857 mutex_enter(&fmrpool->fmr_lock);
2858 hermon_fmr_cleanup(fmrpool);
2859 mutex_exit(&fmrpool->fmr_lock);
2860 } else
2861 mutex_exit(&fmrpool->dirty_lock);
2862 }
2863 return (DDI_SUCCESS);
2864 }
2865
2866 /*
2867 * hermon_fmr_cleanup()
2868 * Context: Called from any context.
2869 */
2870 static void
hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool)2871 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool)
2872 {
2873 int status;
2874
2875 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2876
2877 if (fmrpool->fmr_stat_register == 0)
2878 return;
2879
2880 fmrpool->fmr_stat_register = 0;
2881 membar_producer();
2882
2883 if (hermon_fmr_verbose)
2884 IBTF_DPRINTF_L2("fmr", "TPT_SYNC");
2885 status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state,
2886 HERMON_CMD_NOSLEEP_SPIN);
2887 if (status != HERMON_CMD_SUCCESS) {
2888 cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status);
2889 }
2890 fmrpool->fmr_remap_gen++;
2891
2892 /* add everything back to the free list */
2893 mutex_enter(&fmrpool->dirty_lock);
2894 if (fmrpool->fmr_dirty_list) {
2895 /* add to free list */
2896 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list;
2897 fmrpool->fmr_dirty_list = NULL;
2898 fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail;
2899
2900 /* reset list */
2901 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2902 fmrpool->fmr_free_len += fmrpool->fmr_dirty_len;
2903 fmrpool->fmr_dirty_len = 0;
2904 }
2905 mutex_exit(&fmrpool->dirty_lock);
2906
2907 mutex_enter(&fmrpool->remap_lock);
2908 if (fmrpool->fmr_remap_list) {
2909 /* add to free list */
2910 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list;
2911 fmrpool->fmr_remap_list = NULL;
2912 fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail;
2913
2914 /* reset list */
2915 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2916 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2917 fmrpool->fmr_remap_len = 0;
2918 }
2919 mutex_exit(&fmrpool->remap_lock);
2920
2921 if (fmrpool->fmr_flush_function != NULL) {
2922 (void) fmrpool->fmr_flush_function(
2923 (ibc_fmr_pool_hdl_t)fmrpool,
2924 fmrpool->fmr_flush_arg);
2925 }
2926 }
2927