1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_misc.c
29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection
30 * Domain, and port-related operations
31 *
32 * Implements all the routines necessary for allocating, freeing, querying
33 * and modifying Address Handles and Protection Domains. Also implements
34 * all the routines necessary for adding and removing Queue Pairs to/from
35 * Multicast Groups. Lastly, it implements the routines necessary for
36 * port-related query and modify operations.
37 */
38
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/modctl.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46
47 #include <sys/ib/adapters/tavor/tavor.h>
48
49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav,
50 uint_t flag);
51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
52 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found);
53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg,
54 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp);
55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp);
56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp);
57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state,
58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg,
60 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc);
61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
62 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry);
63 static int tavor_mcg_entry_invalidate(tavor_state_t *state,
64 tavor_hw_mcg_t *mcg_entry, uint_t indx);
65 static int tavor_mgid_is_valid(ib_gid_t gid);
66 static int tavor_mlid_is_valid(ib_lid_t lid);
67
68
69 /*
70 * tavor_ah_alloc()
71 * Context: Can be called only from user or kernel context.
72 */
73 int
tavor_ah_alloc(tavor_state_t * state,tavor_pdhdl_t pd,ibt_adds_vect_t * attr_p,tavor_ahhdl_t * ahhdl,uint_t sleepflag)74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd,
75 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
76 {
77 tavor_rsrc_t *udav, *rsrc;
78 tavor_hw_udav_t udav_entry;
79 tavor_ahhdl_t ah;
80 ibt_mr_attr_t mr_attr;
81 tavor_mr_options_t op;
82 tavor_mrhdl_t mr;
83 uint64_t data;
84 uint32_t size;
85 int status, i, flag;
86 char *errormsg;
87
88 TAVOR_TNF_ENTER(tavor_ah_alloc);
89
90 /*
91 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
92 * indicate that we wish to allocate an "invalid" (i.e. empty)
93 * address handle XXX
94 */
95
96 /* Validate that specified port number is legal */
97 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
98 /* Set "status" and "errormsg" and goto failure */
99 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
100 goto ahalloc_fail;
101 }
102
103 /*
104 * Allocate a UDAV entry. This will be filled in with all the
105 * necessary parameters to define the Address Handle. Unlike the
106 * other hardware resources no ownership transfer takes place as
107 * these UDAV entries are always owned by hardware.
108 */
109 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav);
110 if (status != DDI_SUCCESS) {
111 /* Set "status" and "errormsg" and goto failure */
112 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV");
113 goto ahalloc_fail;
114 }
115
116 /*
117 * Allocate the software structure for tracking the address handle
118 * (i.e. the Tavor Address Handle struct). If we fail here, we must
119 * undo the previous resource allocation.
120 */
121 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc);
122 if (status != DDI_SUCCESS) {
123 /* Set "status" and "errormsg" and goto failure */
124 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler");
125 goto ahalloc_fail1;
126 }
127 ah = (tavor_ahhdl_t)rsrc->tr_addr;
128 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
129
130 /* Increment the reference count on the protection domain (PD) */
131 tavor_pd_refcnt_inc(pd);
132
133 /*
134 * Fill in the UDAV entry. Note: We are only filling in a temporary
135 * copy here, which we will later copy into the actual entry in
136 * Tavor DDR memory. This starts be zeroing out the temporary copy
137 * and then calling tavor_set_addr_path() to fill in the common
138 * portions that can be pulled from the "ibt_adds_vect_t" passed in
139 */
140 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
141 status = tavor_set_addr_path(state, attr_p,
142 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
143 if (status != DDI_SUCCESS) {
144 tavor_pd_refcnt_dec(pd);
145 tavor_rsrc_free(state, &rsrc);
146 tavor_rsrc_free(state, &udav);
147 /* Set "status" and "errormsg" and goto failure */
148 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path");
149 goto ahalloc_fail;
150 }
151 udav_entry.pd = pd->pd_pdnum;
152 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1;
153
154 /*
155 * Register the memory for the UDAV. The memory for the UDAV must
156 * be registered in the Tavor TPT tables. This gives us the LKey
157 * that we will need when we later post a UD work request that
158 * uses this address handle.
159 * We might be able to pre-register all the memory for the UDAV XXX
160 */
161 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
162 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr;
163 mr_attr.mr_len = udav->tr_len;
164 mr_attr.mr_as = NULL;
165 mr_attr.mr_flags = flag;
166 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
167 op.mro_bind_dmahdl = NULL;
168 op.mro_bind_override_addr = 0;
169 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
170 if (status != DDI_SUCCESS) {
171 /* Set "status" and "errormsg" and goto failure */
172 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
173 goto ahalloc_fail2;
174 }
175
176 /*
177 * Fill in the UDAV entry. Here we copy all the information from
178 * the temporary UDAV into the DDR memory for the real UDAV entry.
179 * Note that we copy everything but the first 64-bit word. This
180 * is where the PD number for the address handle resides.
181 * By filling everything except the PD and then writing the PD in
182 * a separate step below, we can ensure that the UDAV is not
183 * accessed while there are partially written values in it (something
184 * which really should not happen anyway). This is guaranteed
185 * because we take measures to ensure that the PD number is zero for
186 * all unused UDAV (and because PD#0 is reserved for Tavor).
187 */
188 size = sizeof (tavor_hw_udav_t) >> 3;
189 for (i = 1; i < size; i++) {
190 data = ((uint64_t *)&udav_entry)[i];
191 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
192 data);
193 }
194 data = ((uint64_t *)&udav_entry)[0];
195 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data);
196
197 /*
198 * Fill in the rest of the Tavor Address Handle struct. Having
199 * successfully copied the UDAV into the hardware, we update the
200 * following fields for use in further operations on the AH.
201 *
202 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
203 * here because we may need to return it later to the IBTF (as a
204 * result of a subsequent query operation). Unlike the other UDAV
205 * parameters, the value of "av_dgid.gid_guid" is not always preserved
206 * by being written to hardware. The reason for this is described in
207 * tavor_set_addr_path().
208 */
209 ah->ah_udavrsrcp = udav;
210 ah->ah_rsrcp = rsrc;
211 ah->ah_pdhdl = pd;
212 ah->ah_mrhdl = mr;
213 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
214 ah->ah_save_srate = attr_p->av_srate;
215 *ahhdl = ah;
216
217 /* Determine if later ddi_dma_sync will be necessary */
218 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state);
219
220 /* Sync the UDAV for use by the hardware */
221 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
222
223 TAVOR_TNF_EXIT(tavor_ah_alloc);
224 return (DDI_SUCCESS);
225
226 ahalloc_fail2:
227 tavor_pd_refcnt_dec(pd);
228 tavor_rsrc_free(state, &rsrc);
229 ahalloc_fail1:
230 tavor_rsrc_free(state, &udav);
231 ahalloc_fail:
232 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "",
233 tnf_string, msg, errormsg);
234 TAVOR_TNF_EXIT(tavor_ah_alloc);
235 return (status);
236 }
237
238
239 /*
240 * tavor_ah_free()
241 * Context: Can be called only from user or kernel context.
242 */
243 /* ARGSUSED */
244 int
tavor_ah_free(tavor_state_t * state,tavor_ahhdl_t * ahhdl,uint_t sleepflag)245 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
246 {
247 tavor_rsrc_t *udav, *rsrc;
248 tavor_pdhdl_t pd;
249 tavor_mrhdl_t mr;
250 tavor_ahhdl_t ah;
251 int status;
252
253 TAVOR_TNF_ENTER(tavor_ah_free);
254
255 /*
256 * Pull all the necessary information from the Tavor Address Handle
257 * struct. This is necessary here because the resource for the
258 * AH is going to be freed up as part of this operation.
259 */
260 ah = *ahhdl;
261 mutex_enter(&ah->ah_lock);
262 udav = ah->ah_udavrsrcp;
263 rsrc = ah->ah_rsrcp;
264 pd = ah->ah_pdhdl;
265 mr = ah->ah_mrhdl;
266 mutex_exit(&ah->ah_lock);
267 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
268
269 /*
270 * Deregister the memory for the UDAV. If this fails for any reason,
271 * then it is an indication that something (either in HW or SW) has
272 * gone seriously wrong. So we print a warning message and return
273 * failure.
274 */
275 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
276 sleepflag);
277 if (status != DDI_SUCCESS) {
278 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
279 TAVOR_TNF_EXIT(tavor_ah_free);
280 return (ibc_get_ci_failure(0));
281 }
282
283 /*
284 * Write zero to the first 64-bit word in the UDAV entry. As
285 * described above (in tavor_ah_alloc), the PD number is stored in
286 * the first 64-bits of each UDAV and setting this to zero is
287 * guaranteed to invalidate the entry.
288 */
289 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0);
290
291 /* Sync the UDAV for use by the hardware */
292 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
293
294 /* Decrement the reference count on the protection domain (PD) */
295 tavor_pd_refcnt_dec(pd);
296
297 /* Free the Tavor Address Handle structure */
298 tavor_rsrc_free(state, &rsrc);
299
300 /* Free up the UDAV entry resource */
301 tavor_rsrc_free(state, &udav);
302
303 /* Set the ahhdl pointer to NULL and return success */
304 *ahhdl = NULL;
305
306 TAVOR_TNF_EXIT(tavor_ah_free);
307 return (DDI_SUCCESS);
308 }
309
310
311 /*
312 * tavor_ah_query()
313 * Context: Can be called from interrupt or base context.
314 */
315 /* ARGSUSED */
316 int
tavor_ah_query(tavor_state_t * state,tavor_ahhdl_t ah,tavor_pdhdl_t * pd,ibt_adds_vect_t * attr_p)317 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd,
318 ibt_adds_vect_t *attr_p)
319 {
320 tavor_hw_udav_t udav_entry;
321 tavor_rsrc_t *udav;
322 uint64_t data;
323 uint32_t size;
324 int i;
325
326 TAVOR_TNF_ENTER(tavor_ah_query);
327
328 mutex_enter(&ah->ah_lock);
329 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
330
331 /*
332 * Pull all the necessary information from the Tavor Address Handle
333 * structure
334 */
335 udav = ah->ah_udavrsrcp;
336 *pd = ah->ah_pdhdl;
337
338 /*
339 * Copy the UDAV entry into the temporary copy. Here we copy all
340 * the information from the UDAV entry in DDR memory into the
341 * temporary UDAV. Note: We don't need to sync the UDAV for
342 * reading by software because Tavor HW never modifies the entry.
343 */
344 size = sizeof (tavor_hw_udav_t) >> 3;
345 for (i = 0; i < size; i++) {
346 data = ddi_get64(udav->tr_acchdl,
347 ((uint64_t *)udav->tr_addr + i));
348 ((uint64_t *)&udav_entry)[i] = data;
349 }
350
351 /*
352 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill
353 * the common portions that can be pulled from the UDAV we pass in.
354 *
355 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
356 * "ah_save_guid" field we have previously saved away. The reason
357 * for this is described in tavor_ah_alloc() and tavor_ah_modify().
358 */
359 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry,
360 attr_p, TAVOR_ADDRPATH_UDAV, NULL);
361
362 attr_p->av_dgid.gid_guid = ah->ah_save_guid;
363 attr_p->av_srate = ah->ah_save_srate;
364
365 mutex_exit(&ah->ah_lock);
366 TAVOR_TNF_EXIT(tavor_ah_query);
367 return (DDI_SUCCESS);
368 }
369
370
371 /*
372 * tavor_ah_modify()
373 * Context: Can be called from interrupt or base context.
374 */
375 /* ARGSUSED */
376 int
tavor_ah_modify(tavor_state_t * state,tavor_ahhdl_t ah,ibt_adds_vect_t * attr_p)377 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah,
378 ibt_adds_vect_t *attr_p)
379 {
380 tavor_hw_udav_t udav_entry;
381 tavor_rsrc_t *udav;
382 uint64_t data_new, data_old;
383 uint32_t udav_pd, size, portnum_new;
384 int i, status;
385
386 TAVOR_TNF_ENTER(tavor_ah_modify);
387
388 /* Validate that specified port number is legal */
389 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
390 TNF_PROBE_1(tavor_ah_modify_inv_portnum,
391 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num);
392 TAVOR_TNF_EXIT(tavor_ah_modify);
393 return (IBT_HCA_PORT_INVALID);
394 }
395
396 mutex_enter(&ah->ah_lock);
397
398 /*
399 * Pull all the necessary information from the Tavor Address Handle
400 * structure
401 */
402 udav = ah->ah_udavrsrcp;
403
404 /*
405 * Fill in the UDAV entry. Note: we are only filling in a temporary
406 * copy here, which we will later copy into the actual entry in
407 * Tavor DDR memory. This starts be zeroing out the temporary copy
408 * and then calling tavor_set_addr_path() to fill in the common
409 * portions that can be pulled from the "ibt_adds_vect_t" passed in
410 *
411 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
412 * field here (just as we did during tavor_ah_alloc()) because we
413 * may need to return it later to the IBTF (as a result of a
414 * subsequent query operation). As explained in tavor_ah_alloc(),
415 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
416 * is not always preserved by being written to hardware. The reason
417 * for this is described in tavor_set_addr_path().
418 */
419 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
420 status = tavor_set_addr_path(state, attr_p,
421 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
422 if (status != DDI_SUCCESS) {
423 mutex_exit(&ah->ah_lock);
424 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail,
425 TAVOR_TNF_ERROR, "");
426 TAVOR_TNF_EXIT(tavor_ah_modify);
427 return (status);
428 }
429 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
430 ah->ah_save_srate = attr_p->av_srate;
431
432 /*
433 * Save away the current PD number for this UDAV. Then temporarily
434 * invalidate the entry (by setting the PD to zero). Note: Since
435 * the first 32 bits of the UDAV actually contain the current port
436 * number _and_ current PD number, we need to mask off some bits.
437 */
438 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr);
439 udav_pd = udav_pd & 0xFFFFFF;
440 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0);
441
442 /* Sync the UDAV for use by the hardware */
443 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
444
445 /*
446 * Copy UDAV structure to the entry
447 * Note: We copy in 64-bit chunks. For the first two of these
448 * chunks it is necessary to read the current contents of the
449 * UDAV, mask off the modifiable portions (maintaining any
450 * of the "reserved" portions), and then mask on the new data.
451 */
452 size = sizeof (tavor_hw_udav_t) >> 3;
453 for (i = 0; i < size; i++) {
454 data_new = ((uint64_t *)&udav_entry)[i];
455 data_old = ddi_get64(udav->tr_acchdl,
456 ((uint64_t *)udav->tr_addr + i));
457
458 /*
459 * Apply mask to change only the relevant values. Note: We
460 * extract the new portnum from the address handle here
461 * because the "PD" and "portnum" fields are in the same
462 * 32-bit word in the UDAV. We will use the (new) port
463 * number extracted here when we write the valid PD number
464 * in the last step below.
465 */
466 if (i == 0) {
467 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0;
468 portnum_new = data_new >> 56;
469 } else if (i == 1) {
470 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1;
471 } else {
472 data_old = 0;
473 }
474
475 /* Write the updated values to the UDAV (in DDR) */
476 data_new = data_old | data_new;
477 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
478 data_new);
479 }
480
481 /*
482 * Sync the body of the UDAV for use by the hardware. After we
483 * have updated the PD number (to make the UDAV valid), we sync
484 * again to push the entire entry out for hardware access.
485 */
486 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
487
488 /*
489 * Put the valid PD number back into UDAV entry. Note: Because port
490 * number and PD number are in the same word, we must mask the
491 * new port number with the old PD number before writing it back
492 * to the UDAV entry
493 */
494 udav_pd = ((portnum_new << 24) | udav_pd);
495 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd);
496
497 /* Sync the rest of the UDAV for use by the hardware */
498 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
499
500 mutex_exit(&ah->ah_lock);
501 TAVOR_TNF_EXIT(tavor_ah_modify);
502 return (DDI_SUCCESS);
503 }
504
505
506 /*
507 * tavor_udav_sync()
508 * Context: Can be called from interrupt or base context.
509 */
510 /* ARGSUSED */
511 static void
tavor_udav_sync(tavor_ahhdl_t ah,tavor_hw_udav_t * udav,uint_t flag)512 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag)
513 {
514 ddi_dma_handle_t dmahdl;
515 off_t offset;
516 int status;
517
518 TAVOR_TNF_ENTER(tavor_udav_sync);
519
520 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
521
522 /* Determine if AH needs to be synced or not */
523 if (ah->ah_sync == 0) {
524 TAVOR_TNF_EXIT(tavor_udav_sync);
525 return;
526 }
527
528 /* Get the DMA handle from AH handle */
529 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl;
530
531 /* Calculate offset into address handle */
532 offset = (off_t)0;
533 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag);
534 if (status != DDI_SUCCESS) {
535 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail,
536 TAVOR_TNF_ERROR, "");
537 TAVOR_TNF_EXIT(tavor_udav_sync);
538 return;
539 }
540
541 TAVOR_TNF_EXIT(tavor_udav_sync);
542 }
543
544
545 /*
546 * tavor_mcg_attach()
547 * Context: Can be called only from user or kernel context.
548 */
549 int
tavor_mcg_attach(tavor_state_t * state,tavor_qphdl_t qp,ib_gid_t gid,ib_lid_t lid)550 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
551 ib_lid_t lid)
552 {
553 tavor_rsrc_t *rsrc;
554 tavor_hw_mcg_t *mcg_entry;
555 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
556 tavor_mcghdl_t mcg, newmcg;
557 uint64_t mgid_hash;
558 uint32_t end_indx;
559 int status;
560 uint_t qp_found;
561 char *errormsg;
562
563 TAVOR_TNF_ENTER(tavor_mcg_attach);
564
565 /*
566 * It is only allowed to attach MCG to UD queue pairs. Verify
567 * that the intended QP is of the appropriate transport type
568 */
569 if (qp->qp_serv_type != TAVOR_QP_UD) {
570 /* Set "status" and "errormsg" and goto failure */
571 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type");
572 goto mcgattach_fail;
573 }
574
575 /*
576 * Check for invalid Multicast DLID. Specifically, all Multicast
577 * LIDs should be within a well defined range. If the specified LID
578 * is outside of that range, then return an error.
579 */
580 if (tavor_mlid_is_valid(lid) == 0) {
581 /* Set "status" and "errormsg" and goto failure */
582 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID");
583 goto mcgattach_fail;
584 }
585 /*
586 * Check for invalid Multicast GID. All Multicast GIDs should have
587 * a well-defined pattern of bits and flags that are allowable. If
588 * the specified GID does not meet the criteria, then return an error.
589 */
590 if (tavor_mgid_is_valid(gid) == 0) {
591 /* Set "status" and "errormsg" and goto failure */
592 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID");
593 goto mcgattach_fail;
594 }
595
596 /*
597 * Compute the MGID hash value. Since the MCG table is arranged as
598 * a number of separate hash chains, this operation converts the
599 * specified MGID into the starting index of an entry in the hash
600 * table (i.e. the index for the start of the appropriate hash chain).
601 * Subsequent operations below will walk the chain searching for the
602 * right place to add this new QP.
603 */
604 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
605 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
606 if (status != TAVOR_CMD_SUCCESS) {
607 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
608 status);
609 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail,
610 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
611 TAVOR_TNF_EXIT(tavor_mcg_attach);
612 return (ibc_get_ci_failure(0));
613 }
614
615 /*
616 * Grab the multicast group mutex. Then grab the pre-allocated
617 * temporary buffer used for holding and/or modifying MCG entries.
618 * Zero out the temporary MCG entry before we begin.
619 */
620 mutex_enter(&state->ts_mcglock);
621 mcg_entry = state->ts_mcgtmp;
622 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
623 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
624
625 /*
626 * Walk through the array of MCG entries starting at "mgid_hash".
627 * Try to find the appropriate place for this new QP to be added.
628 * This could happen when the first entry of the chain has MGID == 0
629 * (which means that the hash chain is empty), or because we find
630 * an entry with the same MGID (in which case we'll add the QP to
631 * that MCG), or because we come to the end of the chain (in which
632 * case this is the first QP being added to the multicast group that
633 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine
634 * walks the list and returns an index into the MCG table. The entry
635 * at this index is then checked to determine which case we have
636 * fallen into (see below). Note: We are using the "shadow" MCG
637 * list (of tavor_mcg_t structs) for this lookup because the real
638 * MCG entries are in hardware (and the lookup process would be much
639 * more time consuming).
640 */
641 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
642 mcg = &state->ts_mcghdl[end_indx];
643
644 /*
645 * If MGID == 0, then the hash chain is empty. Just fill in the
646 * current entry. Note: No need to allocate an MCG table entry
647 * as all the hash chain "heads" are already preallocated.
648 */
649 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
650
651 /* Fill in the current entry in the "shadow" MCG list */
652 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
653
654 /*
655 * Try to add the new QP number to the list. This (and the
656 * above) routine fills in a temporary MCG. The "mcg_entry"
657 * and "mcg_entry_qplist" pointers simply point to different
658 * offsets within the same temporary copy of the MCG (for
659 * convenience). Note: If this fails, we need to invalidate
660 * the entries we've already put into the "shadow" list entry
661 * above.
662 */
663 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
664 &qp_found);
665 if (status != DDI_SUCCESS) {
666 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
667 mutex_exit(&state->ts_mcglock);
668 /* Set "status" and "errormsg" and goto failure */
669 TAVOR_TNF_FAIL(status, "failed qplist add");
670 goto mcgattach_fail;
671 }
672
673 /*
674 * Once the temporary MCG has been filled in, write the entry
675 * into the appropriate location in the Tavor MCG entry table.
676 * If it's successful, then drop the lock and return success.
677 * Note: In general, this operation shouldn't fail. If it
678 * does, then it is an indication that something (probably in
679 * HW, but maybe in SW) has gone seriously wrong. We still
680 * want to zero out the entries that we've filled in above
681 * (in the tavor_mcg_setup_new_hdr() routine).
682 */
683 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
684 TAVOR_CMD_NOSLEEP_SPIN);
685 if (status != TAVOR_CMD_SUCCESS) {
686 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
687 mutex_exit(&state->ts_mcglock);
688 TAVOR_WARNING(state, "failed to write MCG entry");
689 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
690 "%08x\n", status);
691 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
692 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
693 tnf_uint, indx, end_indx);
694 TAVOR_TNF_EXIT(tavor_mcg_attach);
695 return (ibc_get_ci_failure(0));
696 }
697
698 /*
699 * Now that we know all the Tavor firmware accesses have been
700 * successful, we update the "shadow" MCG entry by incrementing
701 * the "number of attached QPs" count.
702 *
703 * We increment only if the QP is not already part of the
704 * MCG by checking the 'qp_found' flag returned from the
705 * qplist_add above.
706 */
707 if (!qp_found) {
708 mcg->mcg_num_qps++;
709
710 /*
711 * Increment the refcnt for this QP. Because the QP
712 * was added to this MCG, the refcnt must be
713 * incremented.
714 */
715 tavor_qp_mcg_refcnt_inc(qp);
716 }
717
718 /*
719 * We drop the lock and return success.
720 */
721 mutex_exit(&state->ts_mcglock);
722 TAVOR_TNF_EXIT(tavor_mcg_attach);
723 return (DDI_SUCCESS);
724 }
725
726 /*
727 * If the specified MGID matches the MGID in the current entry, then
728 * we need to try to add the QP to the current MCG entry. In this
729 * case, it means that we need to read the existing MCG entry (into
730 * the temporary MCG), add the new QP number to the temporary entry
731 * (using the same method we used above), and write the entry back
732 * to the hardware (same as above).
733 */
734 if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
735 (mcg->mcg_mgid_l == gid.gid_guid)) {
736
737 /*
738 * Read the current MCG entry into the temporary MCG. Note:
739 * In general, this operation shouldn't fail. If it does,
740 * then it is an indication that something (probably in HW,
741 * but maybe in SW) has gone seriously wrong.
742 */
743 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
744 TAVOR_CMD_NOSLEEP_SPIN);
745 if (status != TAVOR_CMD_SUCCESS) {
746 mutex_exit(&state->ts_mcglock);
747 TAVOR_WARNING(state, "failed to read MCG entry");
748 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
749 "%08x\n", status);
750 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
751 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
752 tnf_uint, indx, end_indx);
753 TAVOR_TNF_EXIT(tavor_mcg_attach);
754 return (ibc_get_ci_failure(0));
755 }
756
757 /*
758 * Try to add the new QP number to the list. This routine
759 * fills in the necessary pieces of the temporary MCG. The
760 * "mcg_entry_qplist" pointer is used to point to the portion
761 * of the temporary MCG that holds the QP numbers.
762 *
763 * Note: tavor_mcg_qplist_add() returns SUCCESS if it
764 * already found the QP in the list. In this case, the QP is
765 * not added on to the list again. Check the flag 'qp_found'
766 * if this value is needed to be known.
767 *
768 */
769 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
770 &qp_found);
771 if (status != DDI_SUCCESS) {
772 mutex_exit(&state->ts_mcglock);
773 /* Set "status" and "errormsg" and goto failure */
774 TAVOR_TNF_FAIL(status, "failed qplist add");
775 goto mcgattach_fail;
776 }
777
778 /*
779 * Once the temporary MCG has been updated, write the entry
780 * into the appropriate location in the Tavor MCG entry table.
781 * If it's successful, then drop the lock and return success.
782 * Note: In general, this operation shouldn't fail. If it
783 * does, then it is an indication that something (probably in
784 * HW, but maybe in SW) has gone seriously wrong.
785 */
786 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
787 TAVOR_CMD_NOSLEEP_SPIN);
788 if (status != TAVOR_CMD_SUCCESS) {
789 mutex_exit(&state->ts_mcglock);
790 TAVOR_WARNING(state, "failed to write MCG entry");
791 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
792 "%08x\n", status);
793 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
794 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
795 tnf_uint, indx, end_indx);
796 TAVOR_TNF_EXIT(tavor_mcg_attach);
797 return (ibc_get_ci_failure(0));
798 }
799
800 /*
801 * Now that we know all the Tavor firmware accesses have been
802 * successful, we update the current "shadow" MCG entry by
803 * incrementing the "number of attached QPs" count.
804 *
805 * We increment only if the QP is not already part of the
806 * MCG by checking the 'qp_found' flag returned from the
807 * qplist_add above.
808 */
809 if (!qp_found) {
810 mcg->mcg_num_qps++;
811
812 /*
813 * Increment the refcnt for this QP. Because the QP
814 * was added to this MCG, the refcnt must be
815 * incremented.
816 */
817 tavor_qp_mcg_refcnt_inc(qp);
818 }
819
820 /*
821 * We drop the lock and return success.
822 */
823 mutex_exit(&state->ts_mcglock);
824 TAVOR_TNF_EXIT(tavor_mcg_attach);
825 return (DDI_SUCCESS);
826 }
827
828 /*
829 * If we've reached here, then we're at the end of the hash chain.
830 * We need to allocate a new MCG entry, fill it in, write it to Tavor,
831 * and update the previous entry to link the new one to the end of the
832 * chain.
833 */
834
835 /*
836 * Allocate an MCG table entry. This will be filled in with all
837 * the necessary parameters to define the multicast group. Then it
838 * will be written to the hardware in the next-to-last step below.
839 */
840 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc);
841 if (status != DDI_SUCCESS) {
842 mutex_exit(&state->ts_mcglock);
843 /* Set "status" and "errormsg" and goto failure */
844 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG");
845 goto mcgattach_fail;
846 }
847
848 /*
849 * Fill in the new entry in the "shadow" MCG list. Note: Just as
850 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion
851 * of the temporary MCG entry (the rest of which will be filled in by
852 * tavor_mcg_qplist_add() below)
853 */
854 newmcg = &state->ts_mcghdl[rsrc->tr_indx];
855 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
856
857 /*
858 * Try to add the new QP number to the list. This routine fills in
859 * the final necessary pieces of the temporary MCG. The
860 * "mcg_entry_qplist" pointer is used to point to the portion of the
861 * temporary MCG that holds the QP numbers. If we fail here, we
862 * must undo the previous resource allocation.
863 *
864 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already
865 * found the QP in the list. In this case, the QP is not added on to
866 * the list again. Check the flag 'qp_found' if this value is needed
867 * to be known.
868 */
869 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
870 &qp_found);
871 if (status != DDI_SUCCESS) {
872 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
873 tavor_rsrc_free(state, &rsrc);
874 mutex_exit(&state->ts_mcglock);
875 /* Set "status" and "errormsg" and goto failure */
876 TAVOR_TNF_FAIL(status, "failed qplist add");
877 goto mcgattach_fail;
878 }
879
880 /*
881 * Once the temporary MCG has been updated, write the entry into the
882 * appropriate location in the Tavor MCG entry table. If this is
883 * successful, then we need to chain the previous entry to this one.
884 * Note: In general, this operation shouldn't fail. If it does, then
885 * it is an indication that something (probably in HW, but maybe in
886 * SW) has gone seriously wrong.
887 */
888 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx,
889 TAVOR_CMD_NOSLEEP_SPIN);
890 if (status != TAVOR_CMD_SUCCESS) {
891 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
892 tavor_rsrc_free(state, &rsrc);
893 mutex_exit(&state->ts_mcglock);
894 TAVOR_WARNING(state, "failed to write MCG entry");
895 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
896 status);
897 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
898 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
899 tnf_uint, indx, rsrc->tr_indx);
900 TAVOR_TNF_EXIT(tavor_mcg_attach);
901 return (ibc_get_ci_failure(0));
902 }
903
904 /*
905 * Now read the current MCG entry (the one previously at the end of
906 * hash chain) into the temporary MCG. We are going to update its
907 * "next_gid_indx" now and write the entry back to the MCG table.
908 * Note: In general, this operation shouldn't fail. If it does, then
909 * it is an indication that something (probably in HW, but maybe in SW)
910 * has gone seriously wrong. We will free up the MCG entry resource,
911 * but we will not undo the previously written MCG entry in the HW.
912 * This is OK, though, because the MCG entry is not currently attached
913 * to any hash chain.
914 */
915 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
916 TAVOR_CMD_NOSLEEP_SPIN);
917 if (status != TAVOR_CMD_SUCCESS) {
918 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
919 tavor_rsrc_free(state, &rsrc);
920 mutex_exit(&state->ts_mcglock);
921 TAVOR_WARNING(state, "failed to read MCG entry");
922 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
923 status);
924 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
925 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
926 tnf_uint, indx, end_indx);
927 TAVOR_TNF_EXIT(tavor_mcg_attach);
928 return (ibc_get_ci_failure(0));
929 }
930
931 /*
932 * Finally, we update the "next_gid_indx" field in the temporary MCG
933 * and attempt to write the entry back into the Tavor MCG table. If
934 * this succeeds, then we update the "shadow" list to reflect the
935 * change, drop the lock, and return success. Note: In general, this
936 * operation shouldn't fail. If it does, then it is an indication
937 * that something (probably in HW, but maybe in SW) has gone seriously
938 * wrong. Just as we do above, we will free up the MCG entry resource,
939 * but we will not try to undo the previously written MCG entry. This
940 * is OK, though, because (since we failed here to update the end of
941 * the chain) that other entry is not currently attached to any chain.
942 */
943 mcg_entry->next_gid_indx = rsrc->tr_indx;
944 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
945 TAVOR_CMD_NOSLEEP_SPIN);
946 if (status != TAVOR_CMD_SUCCESS) {
947 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
948 tavor_rsrc_free(state, &rsrc);
949 mutex_exit(&state->ts_mcglock);
950 TAVOR_WARNING(state, "failed to write MCG entry");
951 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
952 status);
953 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
954 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
955 tnf_uint, indx, end_indx);
956 TAVOR_TNF_EXIT(tavor_mcg_attach);
957 return (ibc_get_ci_failure(0));
958 }
959 mcg = &state->ts_mcghdl[end_indx];
960 mcg->mcg_next_indx = rsrc->tr_indx;
961
962 /*
963 * Now that we know all the Tavor firmware accesses have been
964 * successful, we update the new "shadow" MCG entry by incrementing
965 * the "number of attached QPs" count. Then we drop the lock and
966 * return success.
967 */
968 newmcg->mcg_num_qps++;
969
970 /*
971 * Increment the refcnt for this QP. Because the QP
972 * was added to this MCG, the refcnt must be
973 * incremented.
974 */
975 tavor_qp_mcg_refcnt_inc(qp);
976
977 mutex_exit(&state->ts_mcglock);
978 TAVOR_TNF_EXIT(tavor_mcg_attach);
979 return (DDI_SUCCESS);
980
981 mcgattach_fail:
982 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string,
983 msg, errormsg);
984 TAVOR_TNF_EXIT(tavor_mcg_attach);
985 return (status);
986 }
987
988
989 /*
990 * tavor_mcg_detach()
991 * Context: Can be called only from user or kernel context.
992 */
993 int
tavor_mcg_detach(tavor_state_t * state,tavor_qphdl_t qp,ib_gid_t gid,ib_lid_t lid)994 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
995 ib_lid_t lid)
996 {
997 tavor_hw_mcg_t *mcg_entry;
998 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
999 tavor_mcghdl_t mcg;
1000 uint64_t mgid_hash;
1001 uint32_t end_indx, prev_indx;
1002 int status;
1003
1004 TAVOR_TNF_ENTER(tavor_mcg_detach);
1005
1006 /*
1007 * Check for invalid Multicast DLID. Specifically, all Multicast
1008 * LIDs should be within a well defined range. If the specified LID
1009 * is outside of that range, then return an error.
1010 */
1011 if (tavor_mlid_is_valid(lid) == 0) {
1012 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, "");
1013 TAVOR_TNF_EXIT(tavor_mcg_detach);
1014 return (IBT_MC_MLID_INVALID);
1015 }
1016
1017 /*
1018 * Compute the MGID hash value. As described above, the MCG table is
1019 * arranged as a number of separate hash chains. This operation
1020 * converts the specified MGID into the starting index of an entry in
1021 * the hash table (i.e. the index for the start of the appropriate
1022 * hash chain). Subsequent operations below will walk the chain
1023 * searching for a matching entry from which to attempt to remove
1024 * the specified QP.
1025 */
1026 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1027 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
1028 if (status != TAVOR_CMD_SUCCESS) {
1029 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
1030 status);
1031 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail,
1032 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1033 TAVOR_TNF_EXIT(tavor_mcg_attach);
1034 return (ibc_get_ci_failure(0));
1035 }
1036
1037 /*
1038 * Grab the multicast group mutex. Then grab the pre-allocated
1039 * temporary buffer used for holding and/or modifying MCG entries.
1040 */
1041 mutex_enter(&state->ts_mcglock);
1042 mcg_entry = state->ts_mcgtmp;
1043 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
1044
1045 /*
1046 * Walk through the array of MCG entries starting at "mgid_hash".
1047 * Try to find an MCG entry with a matching MGID. The
1048 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an
1049 * index into the MCG table. The entry at this index is checked to
1050 * determine whether it is a match or not. If it is a match, then
1051 * we continue on to attempt to remove the QP from the MCG. If it
1052 * is not a match (or not a valid MCG entry), then we return an error.
1053 */
1054 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1055 mcg = &state->ts_mcghdl[end_indx];
1056
1057 /*
1058 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1059 * does not match the MGID in the current entry, then return
1060 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1061 * valid).
1062 */
1063 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1064 ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1065 (mcg->mcg_mgid_l != gid.gid_guid))) {
1066 mutex_exit(&state->ts_mcglock);
1067 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, "");
1068 TAVOR_TNF_EXIT(tavor_mcg_detach);
1069 return (IBT_MC_MGID_INVALID);
1070 }
1071
1072 /*
1073 * Read the current MCG entry into the temporary MCG. Note: In
1074 * general, this operation shouldn't fail. If it does, then it is
1075 * an indication that something (probably in HW, but maybe in SW)
1076 * has gone seriously wrong.
1077 */
1078 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
1079 TAVOR_CMD_NOSLEEP_SPIN);
1080 if (status != TAVOR_CMD_SUCCESS) {
1081 mutex_exit(&state->ts_mcglock);
1082 TAVOR_WARNING(state, "failed to read MCG entry");
1083 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1084 status);
1085 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail,
1086 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1087 tnf_uint, indx, end_indx);
1088 TAVOR_TNF_EXIT(tavor_mcg_attach);
1089 return (ibc_get_ci_failure(0));
1090 }
1091
1092 /*
1093 * Search the QP number list for a match. If a match is found, then
1094 * remove the entry from the QP list. Otherwise, if no match is found,
1095 * return an error.
1096 */
1097 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1098 if (status != DDI_SUCCESS) {
1099 mutex_exit(&state->ts_mcglock);
1100 TAVOR_TNF_EXIT(tavor_mcg_detach);
1101 return (status);
1102 }
1103
1104 /*
1105 * Decrement the MCG count for this QP. When the 'qp_mcg'
1106 * field becomes 0, then this QP is no longer a member of any
1107 * MCG.
1108 */
1109 tavor_qp_mcg_refcnt_dec(qp);
1110
1111 /*
1112 * If the current MCG's QP number list is about to be made empty
1113 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1114 * chain. Otherwise, just write the updated MCG entry back to the
1115 * hardware. In either case, once we successfully update the hardware
1116 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1117 * count (or zero out the entire "shadow" list entry) before returning
1118 * success. Note: Zeroing out the "shadow" list entry is done
1119 * inside of tavor_mcg_hash_list_remove().
1120 */
1121 if (mcg->mcg_num_qps == 1) {
1122
1123 /* Remove an MCG entry from the hash chain */
1124 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx,
1125 mcg_entry);
1126 if (status != DDI_SUCCESS) {
1127 mutex_exit(&state->ts_mcglock);
1128 TAVOR_TNF_EXIT(tavor_mcg_detach);
1129 return (status);
1130 }
1131
1132 } else {
1133 /*
1134 * Write the updated MCG entry back to the Tavor MCG table.
1135 * If this succeeds, then we update the "shadow" list to
1136 * reflect the change (i.e. decrement the "mcg_num_qps"),
1137 * drop the lock, and return success. Note: In general,
1138 * this operation shouldn't fail. If it does, then it is an
1139 * indication that something (probably in HW, but maybe in SW)
1140 * has gone seriously wrong.
1141 */
1142 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
1143 TAVOR_CMD_NOSLEEP_SPIN);
1144 if (status != TAVOR_CMD_SUCCESS) {
1145 mutex_exit(&state->ts_mcglock);
1146 TAVOR_WARNING(state, "failed to write MCG entry");
1147 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1148 "%08x\n", status);
1149 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail,
1150 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1151 tnf_uint, indx, end_indx);
1152 TAVOR_TNF_EXIT(tavor_mcg_detach);
1153 return (ibc_get_ci_failure(0));
1154 }
1155 mcg->mcg_num_qps--;
1156 }
1157
1158 mutex_exit(&state->ts_mcglock);
1159 TAVOR_TNF_EXIT(tavor_mcg_detach);
1160 return (DDI_SUCCESS);
1161 }
1162
1163 /*
1164 * tavor_qp_mcg_refcnt_inc()
1165 * Context: Can be called from interrupt or base context.
1166 */
1167 static void
tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp)1168 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp)
1169 {
1170 /* Increment the QP's MCG reference count */
1171 mutex_enter(&qp->qp_lock);
1172 qp->qp_mcg_refcnt++;
1173 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "",
1174 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1175 mutex_exit(&qp->qp_lock);
1176 }
1177
1178
1179 /*
1180 * tavor_qp_mcg_refcnt_dec()
1181 * Context: Can be called from interrupt or base context.
1182 */
1183 static void
tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp)1184 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp)
1185 {
1186 /* Decrement the QP's MCG reference count */
1187 mutex_enter(&qp->qp_lock);
1188 qp->qp_mcg_refcnt--;
1189 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "",
1190 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1191 mutex_exit(&qp->qp_lock);
1192 }
1193
1194
1195 /*
1196 * tavor_mcg_qplist_add()
1197 * Context: Can be called from interrupt or base context.
1198 */
1199 static int
tavor_mcg_qplist_add(tavor_state_t * state,tavor_mcghdl_t mcg,tavor_hw_mcg_qp_list_t * mcg_qplist,tavor_qphdl_t qp,uint_t * qp_found)1200 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
1201 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp,
1202 uint_t *qp_found)
1203 {
1204 uint_t qplist_indx;
1205
1206 TAVOR_TNF_ENTER(tavor_mcg_qplist_add);
1207
1208 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1209
1210 qplist_indx = mcg->mcg_num_qps;
1211
1212 /*
1213 * Determine if we have exceeded the maximum number of QP per
1214 * multicast group. If we have, then return an error
1215 */
1216 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) {
1217 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps,
1218 TAVOR_TNF_ERROR, "");
1219 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1220 return (IBT_HCA_MCG_QP_EXCEEDED);
1221 }
1222
1223 /*
1224 * Determine if the QP is already attached to this MCG table. If it
1225 * is, then we break out and treat this operation as a NO-OP
1226 */
1227 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1228 qplist_indx++) {
1229 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1230 break;
1231 }
1232 }
1233
1234 /*
1235 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1236 * return SUCCESS in this case, but the qplist will not have been
1237 * updated because the QP was already on the list.
1238 */
1239 if (qplist_indx < mcg->mcg_num_qps) {
1240 *qp_found = 1;
1241 } else {
1242 /*
1243 * Otherwise, append the new QP number to the end of the
1244 * current QP list. Note: We will increment the "mcg_num_qps"
1245 * field on the "shadow" MCG list entry later (after we know
1246 * that all necessary Tavor firmware accesses have been
1247 * successful).
1248 *
1249 * Set 'qp_found' to 0 so we know the QP was added on to the
1250 * list for sure.
1251 */
1252 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID;
1253 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum;
1254 *qp_found = 0;
1255 }
1256
1257 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1258 return (DDI_SUCCESS);
1259 }
1260
1261
1262
1263 /*
1264 * tavor_mcg_qplist_remove()
1265 * Context: Can be called from interrupt or base context.
1266 */
1267 static int
tavor_mcg_qplist_remove(tavor_mcghdl_t mcg,tavor_hw_mcg_qp_list_t * mcg_qplist,tavor_qphdl_t qp)1268 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist,
1269 tavor_qphdl_t qp)
1270 {
1271 uint_t i, qplist_indx;
1272
1273 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove);
1274
1275 /*
1276 * Search the MCG QP list for a matching QPN. When
1277 * it's found, we swap the last entry with the current
1278 * one, set the last entry to zero, decrement the last
1279 * entry, and return. If it's not found, then it's
1280 * and error.
1281 */
1282 qplist_indx = mcg->mcg_num_qps;
1283 for (i = 0; i < qplist_indx; i++) {
1284 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1285 mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1286 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID;
1287 mcg_qplist[qplist_indx - 1].qpn = 0;
1288
1289 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1290 return (DDI_SUCCESS);
1291 }
1292 }
1293
1294 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, "");
1295 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1296 return (IBT_QP_HDL_INVALID);
1297 }
1298
1299
1300 /*
1301 * tavor_mcg_walk_mgid_hash()
1302 * Context: Can be called from interrupt or base context.
1303 */
1304 static uint_t
tavor_mcg_walk_mgid_hash(tavor_state_t * state,uint64_t start_indx,ib_gid_t mgid,uint_t * p_indx)1305 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx,
1306 ib_gid_t mgid, uint_t *p_indx)
1307 {
1308 tavor_mcghdl_t curr_mcghdl;
1309 uint_t curr_indx, prev_indx;
1310
1311 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash);
1312
1313 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1314
1315 /* Start at the head of the hash chain */
1316 curr_indx = start_indx;
1317 prev_indx = curr_indx;
1318 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1319
1320 /* If the first entry in the chain has MGID == 0, then stop */
1321 if ((curr_mcghdl->mcg_mgid_h == 0) &&
1322 (curr_mcghdl->mcg_mgid_l == 0)) {
1323 goto end_mgid_hash_walk;
1324 }
1325
1326 /* If the first entry in the chain matches the MGID, then stop */
1327 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1328 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1329 goto end_mgid_hash_walk;
1330 }
1331
1332 /* Otherwise, walk the hash chain looking for a match */
1333 while (curr_mcghdl->mcg_next_indx != 0) {
1334 prev_indx = curr_indx;
1335 curr_indx = curr_mcghdl->mcg_next_indx;
1336 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1337
1338 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1339 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1340 break;
1341 }
1342 }
1343
1344 end_mgid_hash_walk:
1345 /*
1346 * If necessary, return the index of the previous entry too. This
1347 * is primarily used for detaching a QP from a multicast group. It
1348 * may be necessary, in that case, to delete an MCG entry from the
1349 * hash chain and having the index of the previous entry is helpful.
1350 */
1351 if (p_indx != NULL) {
1352 *p_indx = prev_indx;
1353 }
1354 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash);
1355 return (curr_indx);
1356 }
1357
1358
1359 /*
1360 * tavor_mcg_setup_new_hdr()
1361 * Context: Can be called from interrupt or base context.
1362 */
1363 static void
tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg,tavor_hw_mcg_t * mcg_hdr,ib_gid_t mgid,tavor_rsrc_t * mcg_rsrc)1364 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr,
1365 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc)
1366 {
1367 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr);
1368
1369 /*
1370 * Fill in the fields of the "shadow" entry used by software
1371 * to track MCG hardware entry
1372 */
1373 mcg->mcg_mgid_h = mgid.gid_prefix;
1374 mcg->mcg_mgid_l = mgid.gid_guid;
1375 mcg->mcg_rsrcp = mcg_rsrc;
1376 mcg->mcg_next_indx = 0;
1377 mcg->mcg_num_qps = 0;
1378
1379 /*
1380 * Fill the header fields of the MCG entry (in the temporary copy)
1381 */
1382 mcg_hdr->mgid_h = mgid.gid_prefix;
1383 mcg_hdr->mgid_l = mgid.gid_guid;
1384 mcg_hdr->next_gid_indx = 0;
1385
1386 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr);
1387 }
1388
1389
1390 /*
1391 * tavor_mcg_hash_list_remove()
1392 * Context: Can be called only from user or kernel context.
1393 */
1394 static int
tavor_mcg_hash_list_remove(tavor_state_t * state,uint_t curr_indx,uint_t prev_indx,tavor_hw_mcg_t * mcg_entry)1395 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
1396 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry)
1397 {
1398 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg;
1399 uint_t next_indx;
1400 int status;
1401
1402 /* Get the pointer to "shadow" list for current entry */
1403 curr_mcg = &state->ts_mcghdl[curr_indx];
1404
1405 /*
1406 * If this is the first entry on a hash chain, then attempt to replace
1407 * the entry with the next entry on the chain. If there are no
1408 * subsequent entries on the chain, then this is the only entry and
1409 * should be invalidated.
1410 */
1411 if (curr_indx == prev_indx) {
1412
1413 /*
1414 * If this is the only entry on the chain, then invalidate it.
1415 * Note: Invalidating an MCG entry means writing all zeros
1416 * to the entry. This is only necessary for those MCG
1417 * entries that are the "head" entries of the individual hash
1418 * chains. Regardless of whether this operation returns
1419 * success or failure, return that result to the caller.
1420 */
1421 next_indx = curr_mcg->mcg_next_indx;
1422 if (next_indx == 0) {
1423 status = tavor_mcg_entry_invalidate(state, mcg_entry,
1424 curr_indx);
1425 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1426 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1427 return (status);
1428 }
1429
1430 /*
1431 * Otherwise, this is just the first entry on the chain, so
1432 * grab the next one
1433 */
1434 next_mcg = &state->ts_mcghdl[next_indx];
1435
1436 /*
1437 * Read the next MCG entry into the temporary MCG. Note:
1438 * In general, this operation shouldn't fail. If it does,
1439 * then it is an indication that something (probably in HW,
1440 * but maybe in SW) has gone seriously wrong.
1441 */
1442 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx,
1443 TAVOR_CMD_NOSLEEP_SPIN);
1444 if (status != TAVOR_CMD_SUCCESS) {
1445 TAVOR_WARNING(state, "failed to read MCG entry");
1446 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
1447 "%08x\n", status);
1448 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1449 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1450 tnf_uint, indx, next_indx);
1451 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1452 return (ibc_get_ci_failure(0));
1453 }
1454
1455 /*
1456 * Copy/Write the temporary MCG back to the hardware MCG list
1457 * using the current index. This essentially removes the
1458 * current MCG entry from the list by writing over it with
1459 * the next one. If this is successful, then we can do the
1460 * same operation for the "shadow" list. And we can also
1461 * free up the Tavor MCG entry resource that was associated
1462 * with the (old) next entry. Note: In general, this
1463 * operation shouldn't fail. If it does, then it is an
1464 * indication that something (probably in HW, but maybe in SW)
1465 * has gone seriously wrong.
1466 */
1467 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1468 TAVOR_CMD_NOSLEEP_SPIN);
1469 if (status != TAVOR_CMD_SUCCESS) {
1470 TAVOR_WARNING(state, "failed to write MCG entry");
1471 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1472 "%08x\n", status);
1473 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1474 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1475 tnf_uint, indx, curr_indx);
1476 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1477 return (ibc_get_ci_failure(0));
1478 }
1479
1480 /*
1481 * Copy all the software tracking information from the next
1482 * entry on the "shadow" MCG list into the current entry on
1483 * the list. Then invalidate (zero out) the other "shadow"
1484 * list entry.
1485 */
1486 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1487 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s));
1488
1489 /*
1490 * Free up the Tavor MCG entry resource used by the "next"
1491 * MCG entry. That resource is no longer needed by any
1492 * MCG entry which is first on a hash chain (like the "next"
1493 * entry has just become).
1494 */
1495 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1496
1497 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1498 return (DDI_SUCCESS);
1499 }
1500
1501 /*
1502 * Else if this is the last entry on the hash chain (or a middle
1503 * entry, then we update the previous entry's "next_gid_index" field
1504 * to make it point instead to the next entry on the chain. By
1505 * skipping over the removed entry in this way, we can then free up
1506 * any resources associated with the current entry. Note: We don't
1507 * need to invalidate the "skipped over" hardware entry because it
1508 * will no be longer connected to any hash chains, and if/when it is
1509 * finally re-used, it will be written with entirely new values.
1510 */
1511
1512 /*
1513 * Read the next MCG entry into the temporary MCG. Note: In general,
1514 * this operation shouldn't fail. If it does, then it is an
1515 * indication that something (probably in HW, but maybe in SW) has
1516 * gone seriously wrong.
1517 */
1518 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1519 TAVOR_CMD_NOSLEEP_SPIN);
1520 if (status != TAVOR_CMD_SUCCESS) {
1521 TAVOR_WARNING(state, "failed to read MCG entry");
1522 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1523 status);
1524 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1525 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1526 tnf_uint, indx, prev_indx);
1527 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1528 return (ibc_get_ci_failure(0));
1529 }
1530
1531 /*
1532 * Finally, we update the "next_gid_indx" field in the temporary MCG
1533 * and attempt to write the entry back into the Tavor MCG table. If
1534 * this succeeds, then we update the "shadow" list to reflect the
1535 * change, free up the Tavor MCG entry resource that was associated
1536 * with the current entry, and return success. Note: In general,
1537 * this operation shouldn't fail. If it does, then it is an indication
1538 * that something (probably in HW, but maybe in SW) has gone seriously
1539 * wrong.
1540 */
1541 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1542 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1543 TAVOR_CMD_NOSLEEP_SPIN);
1544 if (status != TAVOR_CMD_SUCCESS) {
1545 TAVOR_WARNING(state, "failed to write MCG entry");
1546 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1547 status);
1548 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1549 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1550 tnf_uint, indx, prev_indx);
1551 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1552 return (ibc_get_ci_failure(0));
1553 }
1554
1555 /*
1556 * Get the pointer to the "shadow" MCG list entry for the previous
1557 * MCG. Update its "mcg_next_indx" to point to the next entry
1558 * the one after the current entry. Note: This next index may be
1559 * zero, indicating the end of the list.
1560 */
1561 prev_mcg = &state->ts_mcghdl[prev_indx];
1562 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1563
1564 /*
1565 * Free up the Tavor MCG entry resource used by the current entry.
1566 * This resource is no longer needed because the chain now skips over
1567 * the current entry. Then invalidate (zero out) the current "shadow"
1568 * list entry.
1569 */
1570 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1571 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1572
1573 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1574 return (DDI_SUCCESS);
1575 }
1576
1577
1578 /*
1579 * tavor_mcg_entry_invalidate()
1580 * Context: Can be called only from user or kernel context.
1581 */
1582 static int
tavor_mcg_entry_invalidate(tavor_state_t * state,tavor_hw_mcg_t * mcg_entry,uint_t indx)1583 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry,
1584 uint_t indx)
1585 {
1586 int status;
1587
1588 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate);
1589
1590 /*
1591 * Invalidate the hardware MCG entry by zeroing out this temporary
1592 * MCG and writing it the the hardware. Note: In general, this
1593 * operation shouldn't fail. If it does, then it is an indication
1594 * that something (probably in HW, but maybe in SW) has gone seriously
1595 * wrong.
1596 */
1597 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
1598 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx,
1599 TAVOR_CMD_NOSLEEP_SPIN);
1600 if (status != TAVOR_CMD_SUCCESS) {
1601 TAVOR_WARNING(state, "failed to write MCG entry");
1602 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1603 status);
1604 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail,
1605 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1606 tnf_uint, indx, indx);
1607 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1608 return (ibc_get_ci_failure(0));
1609 }
1610
1611 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1612 return (DDI_SUCCESS);
1613 }
1614
1615
1616 /*
1617 * tavor_mgid_is_valid()
1618 * Context: Can be called from interrupt or base context.
1619 */
1620 static int
tavor_mgid_is_valid(ib_gid_t gid)1621 tavor_mgid_is_valid(ib_gid_t gid)
1622 {
1623 uint_t topbits, flags, scope;
1624
1625 TAVOR_TNF_ENTER(tavor_mgid_is_valid);
1626
1627 /*
1628 * According to IBA 1.1 specification (section 4.1.1) a valid
1629 * "multicast GID" must have its top eight bits set to all ones
1630 */
1631 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) &
1632 TAVOR_MCG_TOPBITS_MASK;
1633 if (topbits != TAVOR_MCG_TOPBITS) {
1634 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR,
1635 "");
1636 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1637 return (0);
1638 }
1639
1640 /*
1641 * The next 4 bits are the "flag" bits. These are valid only
1642 * if they are "0" (which correspond to permanently assigned/
1643 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1644 * multicast GIDs). All other values are reserved.
1645 */
1646 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) &
1647 TAVOR_MCG_FLAGS_MASK;
1648 if (!((flags == TAVOR_MCG_FLAGS_PERM) ||
1649 (flags == TAVOR_MCG_FLAGS_NONPERM))) {
1650 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR,
1651 "", tnf_uint, flags, flags);
1652 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1653 return (0);
1654 }
1655
1656 /*
1657 * The next 4 bits are the "scope" bits. These are valid only
1658 * if they are "2" (Link-local), "5" (Site-local), "8"
1659 * (Organization-local) or "E" (Global). All other values
1660 * are reserved (or currently unassigned).
1661 */
1662 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) &
1663 TAVOR_MCG_SCOPE_MASK;
1664 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) ||
1665 (scope == TAVOR_MCG_SCOPE_SITELOC) ||
1666 (scope == TAVOR_MCG_SCOPE_ORGLOC) ||
1667 (scope == TAVOR_MCG_SCOPE_GLOBAL))) {
1668 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR,
1669 "", tnf_uint, scope, scope);
1670 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1671 return (0);
1672 }
1673
1674 /*
1675 * If it passes all of the above checks, then we will consider it
1676 * a valid multicast GID.
1677 */
1678 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1679 return (1);
1680 }
1681
1682
1683 /*
1684 * tavor_mlid_is_valid()
1685 * Context: Can be called from interrupt or base context.
1686 */
1687 static int
tavor_mlid_is_valid(ib_lid_t lid)1688 tavor_mlid_is_valid(ib_lid_t lid)
1689 {
1690 TAVOR_TNF_ENTER(tavor_mlid_is_valid);
1691
1692 /*
1693 * According to IBA 1.1 specification (section 4.1.1) a valid
1694 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1695 */
1696 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1697 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR,
1698 "", tnf_uint, mlid, lid);
1699 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1700 return (0);
1701 }
1702
1703 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1704 return (1);
1705 }
1706
1707
1708 /*
1709 * tavor_pd_alloc()
1710 * Context: Can be called only from user or kernel context.
1711 */
1712 int
tavor_pd_alloc(tavor_state_t * state,tavor_pdhdl_t * pdhdl,uint_t sleepflag)1713 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag)
1714 {
1715 tavor_rsrc_t *rsrc;
1716 tavor_pdhdl_t pd;
1717 int status;
1718
1719 TAVOR_TNF_ENTER(tavor_pd_alloc);
1720
1721 /*
1722 * Allocate the software structure for tracking the protection domain
1723 * (i.e. the Tavor Protection Domain handle). By default each PD
1724 * structure will have a unique PD number assigned to it. All that
1725 * is necessary is for software to initialize the PD reference count
1726 * (to zero) and return success.
1727 */
1728 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc);
1729 if (status != DDI_SUCCESS) {
1730 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, "");
1731 TAVOR_TNF_EXIT(tavor_pd_alloc);
1732 return (IBT_INSUFF_RESOURCE);
1733 }
1734 pd = (tavor_pdhdl_t)rsrc->tr_addr;
1735 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1736
1737 pd->pd_refcnt = 0;
1738 *pdhdl = pd;
1739
1740 TAVOR_TNF_EXIT(tavor_pd_alloc);
1741 return (DDI_SUCCESS);
1742 }
1743
1744
1745 /*
1746 * tavor_pd_free()
1747 * Context: Can be called only from user or kernel context.
1748 */
1749 int
tavor_pd_free(tavor_state_t * state,tavor_pdhdl_t * pdhdl)1750 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl)
1751 {
1752 tavor_rsrc_t *rsrc;
1753 tavor_pdhdl_t pd;
1754
1755 TAVOR_TNF_ENTER(tavor_pd_free);
1756
1757 /*
1758 * Pull all the necessary information from the Tavor Protection Domain
1759 * handle. This is necessary here because the resource for the
1760 * PD is going to be freed up as part of this operation.
1761 */
1762 pd = *pdhdl;
1763 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1764 rsrc = pd->pd_rsrcp;
1765
1766 /*
1767 * Check the PD reference count. If the reference count is non-zero,
1768 * then it means that this protection domain is still referenced by
1769 * some memory region, queue pair, address handle, or other IB object
1770 * If it is non-zero, then return an error. Otherwise, free the
1771 * Tavor resource and return success.
1772 */
1773 if (pd->pd_refcnt != 0) {
1774 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "",
1775 tnf_int, refcnt, pd->pd_refcnt);
1776 TAVOR_TNF_EXIT(tavor_pd_free);
1777 return (IBT_PD_IN_USE);
1778 }
1779
1780 /* Free the Tavor Protection Domain handle */
1781 tavor_rsrc_free(state, &rsrc);
1782
1783 /* Set the pdhdl pointer to NULL and return success */
1784 *pdhdl = (tavor_pdhdl_t)NULL;
1785
1786 TAVOR_TNF_EXIT(tavor_pd_free);
1787 return (DDI_SUCCESS);
1788 }
1789
1790
1791 /*
1792 * tavor_pd_refcnt_inc()
1793 * Context: Can be called from interrupt or base context.
1794 */
1795 void
tavor_pd_refcnt_inc(tavor_pdhdl_t pd)1796 tavor_pd_refcnt_inc(tavor_pdhdl_t pd)
1797 {
1798 /* Increment the protection domain's reference count */
1799 mutex_enter(&pd->pd_lock);
1800 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "",
1801 tnf_uint, refcnt, pd->pd_refcnt);
1802 pd->pd_refcnt++;
1803 mutex_exit(&pd->pd_lock);
1804
1805 }
1806
1807
1808 /*
1809 * tavor_pd_refcnt_dec()
1810 * Context: Can be called from interrupt or base context.
1811 */
1812 void
tavor_pd_refcnt_dec(tavor_pdhdl_t pd)1813 tavor_pd_refcnt_dec(tavor_pdhdl_t pd)
1814 {
1815 /* Decrement the protection domain's reference count */
1816 mutex_enter(&pd->pd_lock);
1817 pd->pd_refcnt--;
1818 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "",
1819 tnf_uint, refcnt, pd->pd_refcnt);
1820 mutex_exit(&pd->pd_lock);
1821
1822 }
1823
1824
1825 /*
1826 * tavor_port_query()
1827 * Context: Can be called only from user or kernel context.
1828 */
1829 int
tavor_port_query(tavor_state_t * state,uint_t port,ibt_hca_portinfo_t * pi)1830 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1831 {
1832 sm_portinfo_t portinfo;
1833 sm_guidinfo_t guidinfo;
1834 sm_pkey_table_t pkeytable;
1835 ib_gid_t *sgid;
1836 uint_t sgid_max, pkey_max, tbl_size;
1837 int i, j, indx, status;
1838
1839 TAVOR_TNF_ENTER(tavor_port_query);
1840
1841 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1842
1843 /* Validate that specified port number is legal */
1844 if (!tavor_portnum_is_valid(state, port)) {
1845 TNF_PROBE_1(tavor_port_query_inv_portnum_fail,
1846 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1847 TAVOR_TNF_EXIT(tavor_port_query);
1848 return (IBT_HCA_PORT_INVALID);
1849 }
1850
1851 /*
1852 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD
1853 * to the firmware (for the specified port number). This returns
1854 * a full PortInfo MAD (in "portinfo") which we subsequently
1855 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1856 * to the IBTF.
1857 */
1858 status = tavor_getportinfo_cmd_post(state, port,
1859 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1860 if (status != TAVOR_CMD_SUCCESS) {
1861 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command "
1862 "failed: %08x\n", port, status);
1863 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail,
1864 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1865 TAVOR_TNF_EXIT(tavor_port_query);
1866 return (ibc_get_ci_failure(0));
1867 }
1868
1869 /*
1870 * Parse the PortInfo MAD and fill in the IBTF structure
1871 */
1872 pi->p_base_lid = portinfo.LID;
1873 pi->p_qkey_violations = portinfo.Q_KeyViolations;
1874 pi->p_pkey_violations = portinfo.P_KeyViolations;
1875 pi->p_sm_sl = portinfo.MasterSMSL;
1876 pi->p_sm_lid = portinfo.MasterSMLID;
1877 pi->p_linkstate = portinfo.PortState;
1878 pi->p_port_num = portinfo.LocalPortNum;
1879 pi->p_phys_state = portinfo.PortPhysicalState;
1880 pi->p_width_supported = portinfo.LinkWidthSupported;
1881 pi->p_width_enabled = portinfo.LinkWidthEnabled;
1882 pi->p_width_active = portinfo.LinkWidthActive;
1883 pi->p_speed_supported = portinfo.LinkSpeedSupported;
1884 pi->p_speed_enabled = portinfo.LinkSpeedEnabled;
1885 pi->p_speed_active = portinfo.LinkSpeedActive;
1886 pi->p_mtu = portinfo.MTUCap;
1887 pi->p_lmc = portinfo.LMC;
1888 pi->p_max_vl = portinfo.VLCap;
1889 pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1890 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ);
1891 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl;
1892 pi->p_sgid_tbl_sz = (1 << tbl_size);
1893 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl;
1894 pi->p_pkey_tbl_sz = (1 << tbl_size);
1895
1896 /*
1897 * Convert InfiniBand-defined port capability flags to the format
1898 * specified by the IBTF
1899 */
1900 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1901 pi->p_capabilities |= IBT_PORT_CAP_SM;
1902 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1903 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1904 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1905 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1906 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1907 pi->p_capabilities |= IBT_PORT_CAP_DM;
1908 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1909 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1910
1911 /*
1912 * Fill in the SGID table. Since the only access to the Tavor
1913 * GID tables is through the firmware's MAD_IFC interface, we
1914 * post as many GetGUIDInfo MADs as necessary to read in the entire
1915 * contents of the SGID table (for the specified port). Note: The
1916 * GetGUIDInfo command only gets eight GUIDs per operation. These
1917 * GUIDs are then appended to the GID prefix for the port (from the
1918 * GetPortInfo above) to form the entire SGID table.
1919 */
1920 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1921 status = tavor_getguidinfo_cmd_post(state, port, i >> 3,
1922 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1923 if (status != TAVOR_CMD_SUCCESS) {
1924 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) "
1925 "command failed: %08x\n", port, status);
1926 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail,
1927 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1928 TAVOR_TNF_EXIT(tavor_port_query);
1929 return (ibc_get_ci_failure(0));
1930 }
1931
1932 /* Figure out how many of the entries are valid */
1933 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1934 for (j = 0; j < sgid_max; j++) {
1935 indx = (i + j);
1936 sgid = &pi->p_sgid_tbl[indx];
1937 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
1938 sgid->gid_prefix = portinfo.GidPrefix;
1939 sgid->gid_guid = guidinfo.GUIDBlocks[j];
1940 }
1941 }
1942
1943 /*
1944 * Fill in the PKey table. Just as for the GID tables above, the
1945 * only access to the Tavor PKey tables is through the firmware's
1946 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
1947 * to read in the entire contents of the PKey table (for the specified
1948 * port). Note: The GetPKeyTable command only gets 32 PKeys per
1949 * operation.
1950 */
1951 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
1952 status = tavor_getpkeytable_cmd_post(state, port, i,
1953 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
1954 if (status != TAVOR_CMD_SUCCESS) {
1955 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) "
1956 "command failed: %08x\n", port, status);
1957 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail,
1958 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1959 TAVOR_TNF_EXIT(tavor_port_query);
1960 return (ibc_get_ci_failure(0));
1961 }
1962
1963 /* Figure out how many of the entries are valid */
1964 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
1965 for (j = 0; j < pkey_max; j++) {
1966 indx = (i + j);
1967 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j];
1968 }
1969 }
1970
1971 TAVOR_TNF_EXIT(tavor_port_query);
1972 return (DDI_SUCCESS);
1973 }
1974
1975
1976 /*
1977 * tavor_port_modify()
1978 * Context: Can be called only from user or kernel context.
1979 */
1980 /* ARGSUSED */
1981 int
tavor_port_modify(tavor_state_t * state,uint8_t port,ibt_port_modify_flags_t flags,uint8_t init_type)1982 tavor_port_modify(tavor_state_t *state, uint8_t port,
1983 ibt_port_modify_flags_t flags, uint8_t init_type)
1984 {
1985 sm_portinfo_t portinfo;
1986 uint32_t capmask, reset_qkey;
1987 int status;
1988
1989 TAVOR_TNF_ENTER(tavor_port_modify);
1990
1991 /*
1992 * Return an error if either of the unsupported flags are set
1993 */
1994 if ((flags & IBT_PORT_SHUTDOWN) ||
1995 (flags & IBT_PORT_SET_INIT_TYPE)) {
1996 TNF_PROBE_1(tavor_port_modify_inv_flags_fail,
1997 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags);
1998 TAVOR_TNF_EXIT(tavor_port_modify);
1999 return (IBT_NOT_SUPPORTED);
2000 }
2001
2002 /*
2003 * Determine whether we are trying to reset the QKey counter
2004 */
2005 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0;
2006
2007 /* Validate that specified port number is legal */
2008 if (!tavor_portnum_is_valid(state, port)) {
2009 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail,
2010 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
2011 TAVOR_TNF_EXIT(tavor_port_modify);
2012 return (IBT_HCA_PORT_INVALID);
2013 }
2014
2015 /*
2016 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the
2017 * firmware (for the specified port number). This returns a full
2018 * PortInfo MAD (in "portinfo") from which we pull the current
2019 * capability mask. We then modify the capability mask as directed
2020 * by the "pmod_flags" field, and write the updated capability mask
2021 * using the Tavor SET_IB command (below).
2022 */
2023 status = tavor_getportinfo_cmd_post(state, port,
2024 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2025 if (status != TAVOR_CMD_SUCCESS) {
2026 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail,
2027 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2028 TAVOR_TNF_EXIT(tavor_port_modify);
2029 return (ibc_get_ci_failure(0));
2030 }
2031
2032 /*
2033 * Convert InfiniBand-defined port capability flags to the format
2034 * specified by the IBTF. Specifically, we modify the capability
2035 * mask based on the specified values.
2036 */
2037 capmask = portinfo.CapabilityMask;
2038
2039 if (flags & IBT_PORT_RESET_SM)
2040 capmask &= ~SM_CAP_MASK_IS_SM;
2041 else if (flags & IBT_PORT_SET_SM)
2042 capmask |= SM_CAP_MASK_IS_SM;
2043
2044 if (flags & IBT_PORT_RESET_SNMP)
2045 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2046 else if (flags & IBT_PORT_SET_SNMP)
2047 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2048
2049 if (flags & IBT_PORT_RESET_DEVMGT)
2050 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2051 else if (flags & IBT_PORT_SET_DEVMGT)
2052 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2053
2054 if (flags & IBT_PORT_RESET_VENDOR)
2055 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2056 else if (flags & IBT_PORT_SET_VENDOR)
2057 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2058
2059 /*
2060 * Use the Tavor SET_IB command to update the capability mask and
2061 * (possibly) reset the QKey violation counter for the specified port.
2062 * Note: In general, this operation shouldn't fail. If it does, then
2063 * it is an indication that something (probably in HW, but maybe in
2064 * SW) has gone seriously wrong.
2065 */
2066 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey,
2067 TAVOR_SLEEPFLAG_FOR_CONTEXT());
2068 if (status != TAVOR_CMD_SUCCESS) {
2069 TAVOR_WARNING(state, "failed to modify port capabilities");
2070 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: "
2071 "%08x\n", port, status);
2072 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail,
2073 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2074 TAVOR_TNF_EXIT(tavor_port_modify);
2075 return (ibc_get_ci_failure(0));
2076 }
2077
2078 TAVOR_TNF_EXIT(tavor_port_modify);
2079 return (DDI_SUCCESS);
2080 }
2081
2082
2083 /*
2084 * tavor_set_addr_path()
2085 * Context: Can be called from interrupt or base context.
2086 *
2087 * Note: This routine is used for two purposes. It is used to fill in the
2088 * Tavor UDAV fields, and it is used to fill in the address path information
2089 * for QPs. Because the two Tavor structures are similar, common fields can
2090 * be filled in here. Because they are slightly different, however, we pass
2091 * an additional flag to indicate which type is being filled.
2092 */
2093 int
tavor_set_addr_path(tavor_state_t * state,ibt_adds_vect_t * av,tavor_hw_addr_path_t * path,uint_t type,tavor_qphdl_t qp)2094 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av,
2095 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp)
2096 {
2097 uint_t gidtbl_sz;
2098
2099 TAVOR_TNF_ENTER(tavor_set_addr_path);
2100
2101 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2102 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2103
2104 path->ml_path = av->av_src_path;
2105 path->rlid = av->av_dlid;
2106 path->sl = av->av_srvl;
2107
2108 /* Port number only valid (in "av_port_num") if this is a UDAV */
2109 if (type == TAVOR_ADDRPATH_UDAV) {
2110 path->portnum = av->av_port_num;
2111 }
2112
2113 /*
2114 * Validate (and fill in) static rate.
2115 *
2116 * The stat_rate_sup is used to decide how to set the rate and
2117 * if it is zero, the driver uses the old interface.
2118 */
2119 if (state->ts_devlim.stat_rate_sup) {
2120 if (av->av_srate == IBT_SRATE_20) {
2121 path->max_stat_rate = 0; /* 4x@DDR injection rate */
2122 } else if (av->av_srate == IBT_SRATE_5) {
2123 path->max_stat_rate = 3; /* 1x@DDR injection rate */
2124 } else if (av->av_srate == IBT_SRATE_10) {
2125 path->max_stat_rate = 2; /* 4x@SDR injection rate */
2126 } else if (av->av_srate == IBT_SRATE_2) {
2127 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2128 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2129 path->max_stat_rate = 0; /* Max */
2130 } else {
2131 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2132 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2133 TAVOR_TNF_EXIT(tavor_set_addr_path);
2134 return (IBT_STATIC_RATE_INVALID);
2135 }
2136 } else {
2137 if (av->av_srate == IBT_SRATE_10) {
2138 path->max_stat_rate = 0; /* 4x@SDR injection rate */
2139 } else if (av->av_srate == IBT_SRATE_2) {
2140 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2141 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2142 path->max_stat_rate = 0; /* Max */
2143 } else {
2144 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2145 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2146 TAVOR_TNF_EXIT(tavor_set_addr_path);
2147 return (IBT_STATIC_RATE_INVALID);
2148 }
2149 }
2150
2151 /*
2152 * If this is a QP operation save asoft copy.
2153 */
2154 if (qp) {
2155 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2156 qp->qp_save_srate = av->av_srate;
2157 }
2158
2159 /* If "grh" flag is set, then check for valid SGID index too */
2160 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2161 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2162 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail,
2163 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix);
2164 TAVOR_TNF_EXIT(tavor_set_addr_path);
2165 return (IBT_SGID_INVALID);
2166 }
2167
2168 /*
2169 * Fill in all "global" values regardless of the value in the GRH
2170 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2171 * hardware will ignore the other "global" values as necessary. Note:
2172 * SW does this here to enable later query operations to return
2173 * exactly the same params that were passed when the addr path was
2174 * last written.
2175 */
2176 path->grh = av->av_send_grh;
2177 if (type == TAVOR_ADDRPATH_QP) {
2178 path->mgid_index = av->av_sgid_ix;
2179 } else {
2180 /*
2181 * For Tavor UDAV, the "mgid_index" field is the index into
2182 * a combined table (not a per-port table). So some extra
2183 * calculations are necessary.
2184 */
2185 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2186 av->av_sgid_ix;
2187 }
2188 path->flow_label = av->av_flow;
2189 path->tclass = av->av_tclass;
2190 path->hop_limit = av->av_hop;
2191 path->rgid_h = av->av_dgid.gid_prefix;
2192
2193 /*
2194 * According to Tavor PRM, the (31:0) part of rgid_l must be set to
2195 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2196 * only need to do it for UDAV's. So we enforce that here.
2197 *
2198 * NOTE: The entire 64 bits worth of GUID info is actually being
2199 * preserved (for UDAVs) by the callers of this function
2200 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the
2201 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2202 * "don't care".
2203 */
2204 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) {
2205 path->rgid_l = av->av_dgid.gid_guid;
2206 } else {
2207 path->rgid_l = 0x2;
2208 }
2209
2210 TAVOR_TNF_EXIT(tavor_set_addr_path);
2211 return (DDI_SUCCESS);
2212 }
2213
2214
2215 /*
2216 * tavor_get_addr_path()
2217 * Context: Can be called from interrupt or base context.
2218 *
2219 * Note: Just like tavor_set_addr_path() above, this routine is used for two
2220 * purposes. It is used to read in the Tavor UDAV fields, and it is used to
2221 * read in the address path information for QPs. Because the two Tavor
2222 * structures are similar, common fields can be read in here. But because
2223 * they are slightly different, we pass an additional flag to indicate which
2224 * type is being read.
2225 */
2226 void
tavor_get_addr_path(tavor_state_t * state,tavor_hw_addr_path_t * path,ibt_adds_vect_t * av,uint_t type,tavor_qphdl_t qp)2227 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path,
2228 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp)
2229 {
2230 uint_t gidtbl_sz;
2231
2232 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2233 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2234
2235 av->av_src_path = path->ml_path;
2236 av->av_port_num = path->portnum;
2237 av->av_dlid = path->rlid;
2238 av->av_srvl = path->sl;
2239
2240 /*
2241 * Set "av_ipd" value from max_stat_rate.
2242 */
2243 if (qp) {
2244 /*
2245 * If a QP operation use the soft copy
2246 */
2247 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2248 av->av_srate = qp->qp_save_srate;
2249 } else {
2250 /*
2251 * The stat_rate_sup is used to decide how the srate value is
2252 * set and
2253 * if it is zero, the driver uses the old interface.
2254 */
2255 if (state->ts_devlim.stat_rate_sup) {
2256 if (path->max_stat_rate == 0) {
2257 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */
2258 } else if (path->max_stat_rate == 1) {
2259 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2260 } else if (path->max_stat_rate == 2) {
2261 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2262 } else if (path->max_stat_rate == 3) {
2263 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */
2264 }
2265 } else {
2266 if (path->max_stat_rate == 0) {
2267 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2268 } else if (path->max_stat_rate == 1) {
2269 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2270 }
2271 }
2272 }
2273
2274 /*
2275 * Extract all "global" values regardless of the value in the GRH
2276 * flag. Because "av_send_grh" is set only if "grh" is set, software
2277 * knows to ignore the other "global" values as necessary. Note: SW
2278 * does it this way to enable these query operations to return exactly
2279 * the same params that were passed when the addr path was last written.
2280 */
2281 av->av_send_grh = path->grh;
2282 if (type == TAVOR_ADDRPATH_QP) {
2283 av->av_sgid_ix = path->mgid_index;
2284 } else {
2285 /*
2286 * For Tavor UDAV, the "mgid_index" field is the index into
2287 * a combined table (not a per-port table). So some extra
2288 * calculations are necessary.
2289 */
2290 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2291 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2292 gidtbl_sz);
2293 }
2294 av->av_flow = path->flow_label;
2295 av->av_tclass = path->tclass;
2296 av->av_hop = path->hop_limit;
2297 av->av_dgid.gid_prefix = path->rgid_h;
2298 av->av_dgid.gid_guid = path->rgid_l;
2299 }
2300
2301
2302 /*
2303 * tavor_portnum_is_valid()
2304 * Context: Can be called from interrupt or base context.
2305 */
2306 int
tavor_portnum_is_valid(tavor_state_t * state,uint_t portnum)2307 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum)
2308 {
2309 uint_t max_port;
2310
2311 max_port = state->ts_cfg_profile->cp_num_ports;
2312 if ((portnum <= max_port) && (portnum != 0)) {
2313 return (1);
2314 } else {
2315 return (0);
2316 }
2317 }
2318
2319
2320 /*
2321 * tavor_pkeyindex_is_valid()
2322 * Context: Can be called from interrupt or base context.
2323 */
2324 int
tavor_pkeyindex_is_valid(tavor_state_t * state,uint_t pkeyindx)2325 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx)
2326 {
2327 uint_t max_pkeyindx;
2328
2329 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl;
2330 if (pkeyindx < max_pkeyindx) {
2331 return (1);
2332 } else {
2333 return (0);
2334 }
2335 }
2336
2337
2338 /*
2339 * tavor_queue_alloc()
2340 * Context: Can be called from interrupt or base context.
2341 */
2342 int
tavor_queue_alloc(tavor_state_t * state,tavor_qalloc_info_t * qa_info,uint_t sleepflag)2343 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info,
2344 uint_t sleepflag)
2345 {
2346 ddi_dma_attr_t dma_attr;
2347 int (*callback)(caddr_t);
2348 uint64_t realsize, alloc_mask;
2349 uint_t dma_xfer_mode, type;
2350 int flag, status;
2351
2352 TAVOR_TNF_ENTER(tavor_queue_alloc);
2353
2354 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2355
2356 /* Set the callback flag appropriately */
2357 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP :
2358 DDI_DMA_DONTWAIT;
2359
2360 /*
2361 * Initialize many of the default DMA attributes. Then set additional
2362 * alignment restrictions as necessary for the queue memory. Also
2363 * respect the configured value for IOMMU bypass
2364 */
2365 tavor_dma_attr_init(&dma_attr);
2366 dma_attr.dma_attr_align = qa_info->qa_bind_align;
2367 type = state->ts_cfg_profile->cp_iommu_bypass;
2368 if (type == TAVOR_BINDMEM_BYPASS) {
2369 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2370 }
2371
2372 /* Allocate a DMA handle */
2373 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL,
2374 &qa_info->qa_dmahdl);
2375 if (status != DDI_SUCCESS) {
2376 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, "");
2377 TAVOR_TNF_EXIT(tavor_queue_alloc);
2378 return (DDI_FAILURE);
2379 }
2380
2381 /*
2382 * Determine the amount of memory to allocate, depending on the values
2383 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2384 * to solve here is that allocating a DMA handle with IOMMU bypass
2385 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2386 * that are less than the page size. Since we may need stricter
2387 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in
2388 * Tavor QP work queue memory allocation), we use the following method
2389 * to calculate how much additional memory to request, and we enforce
2390 * our own alignment on the allocated result.
2391 */
2392 alloc_mask = qa_info->qa_alloc_align - 1;
2393 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2394 realsize = qa_info->qa_size;
2395 } else {
2396 realsize = qa_info->qa_size + alloc_mask;
2397 }
2398
2399 /*
2400 * If we are to allocate the queue from system memory, then use
2401 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to
2402 * allocate the queue from locally-attached DDR memory, then use the
2403 * vmem allocator to find the space. In either case, return a pointer
2404 * to the memory range allocated (including any necessary alignment
2405 * adjustments), the "real" memory pointer, the "real" size, and a
2406 * ddi_acc_handle_t to use when reading from/writing to the memory.
2407 */
2408 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2409
2410 /*
2411 * Determine whether to map STREAMING or CONSISTENT. This is
2412 * based on the value set in the configuration profile at
2413 * attach time.
2414 */
2415 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
2416
2417 /* Allocate system memory for the queue */
2418 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2419 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL,
2420 (caddr_t *)&qa_info->qa_buf_real,
2421 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2422 if (status != DDI_SUCCESS) {
2423 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2424 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail,
2425 TAVOR_TNF_ERROR, "");
2426 TAVOR_TNF_EXIT(tavor_queue_alloc);
2427 return (DDI_FAILURE);
2428 }
2429
2430 /*
2431 * Save temporary copy of the real pointer. (This may be
2432 * modified in the last step below).
2433 */
2434 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2435
2436 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2437
2438 /* Allocate userland mappable memory for the queue */
2439 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP :
2440 DDI_UMEM_NOSLEEP;
2441 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2442 &qa_info->qa_umemcookie);
2443 if (qa_info->qa_buf_real == NULL) {
2444 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2445 TNF_PROBE_0(tavor_queue_alloc_umem_fail,
2446 TAVOR_TNF_ERROR, "");
2447 TAVOR_TNF_EXIT(tavor_queue_alloc);
2448 return (DDI_FAILURE);
2449 }
2450
2451 /*
2452 * Save temporary copy of the real pointer. (This may be
2453 * modified in the last step below).
2454 */
2455 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2456
2457 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2458
2459 /* Allocate DDR memory for the queue */
2460 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP;
2461 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc(
2462 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0,
2463 NULL, NULL, flag);
2464 if (qa_info->qa_buf_real == NULL) {
2465 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2466 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail,
2467 TAVOR_TNF_ERROR, "");
2468 TAVOR_TNF_EXIT(tavor_queue_alloc);
2469 return (DDI_FAILURE);
2470 }
2471
2472 /*
2473 * Since "qa_buf_real" will be a PCI address (the offset into
2474 * the DDR memory), we first need to do some calculations to
2475 * convert it to its kernel mapped address. (Note: This may
2476 * be modified again below, when any additional "alloc"
2477 * alignment constraint is applied).
2478 */
2479 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2480 state->ts_reg_ddr_baseaddr) + ((uintptr_t)
2481 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr));
2482 qa_info->qa_buf_realsz = realsize;
2483 qa_info->qa_acchdl = state->ts_reg_ddrhdl;
2484 }
2485
2486 /*
2487 * The last step is to ensure that the final address ("qa_buf_aligned")
2488 * has the appropriate "alloc" alignment restriction applied to it
2489 * (if necessary).
2490 */
2491 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2492 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2493 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2494 }
2495
2496 TAVOR_TNF_EXIT(tavor_queue_alloc);
2497 return (DDI_SUCCESS);
2498 }
2499
2500
2501 /*
2502 * tavor_queue_free()
2503 * Context: Can be called from interrupt or base context.
2504 */
2505 void
tavor_queue_free(tavor_state_t * state,tavor_qalloc_info_t * qa_info)2506 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info)
2507 {
2508 TAVOR_TNF_ENTER(tavor_queue_free);
2509
2510 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2511
2512 /*
2513 * Depending on how (i.e. from where) we allocated the memory for
2514 * this queue, we choose the appropriate method for releasing the
2515 * resources.
2516 */
2517 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2518
2519 ddi_dma_mem_free(&qa_info->qa_acchdl);
2520
2521 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2522
2523 ddi_umem_free(qa_info->qa_umemcookie);
2524
2525 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2526
2527 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real,
2528 qa_info->qa_buf_realsz);
2529 }
2530
2531 /* Always free the dma handle */
2532 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2533
2534 TAVOR_TNF_EXIT(tavor_queue_free);
2535 }
2536
2537
2538 /*
2539 * tavor_dmaattr_get()
2540 * Context: Can be called from interrupt or base context.
2541 */
2542 void
tavor_dma_attr_init(ddi_dma_attr_t * dma_attr)2543 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr)
2544 {
2545 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
2546
2547 dma_attr->dma_attr_version = DMA_ATTR_V0;
2548 dma_attr->dma_attr_addr_lo = 0;
2549 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull;
2550 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull;
2551 dma_attr->dma_attr_align = 1;
2552 dma_attr->dma_attr_burstsizes = 0x3FF;
2553 dma_attr->dma_attr_minxfer = 1;
2554 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull;
2555 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull;
2556 dma_attr->dma_attr_sgllen = 0x7FFFFFFF;
2557 dma_attr->dma_attr_granular = 1;
2558 dma_attr->dma_attr_flags = 0;
2559 }
2560