xref: /titanic_52/usr/src/uts/common/io/ib/adapters/tavor/tavor_misc.c (revision c7facc54c4abed9e554ff80225311e6b7048d3c9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_misc.c
29  *    Tavor Miscellaneous routines - Address Handle, Multicast, Protection
30  *    Domain, and port-related operations
31  *
32  *    Implements all the routines necessary for allocating, freeing, querying
33  *    and modifying Address Handles and Protection Domains.  Also implements
34  *    all the routines necessary for adding and removing Queue Pairs to/from
35  *    Multicast Groups.  Lastly, it implements the routines necessary for
36  *    port-related query and modify operations.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/modctl.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 
47 #include <sys/ib/adapters/tavor/tavor.h>
48 
49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav,
50     uint_t flag);
51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
52     tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found);
53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg,
54     tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp);
55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp);
56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp);
57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state,
58     uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg,
60     tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc);
61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
62     uint_t prev_indx, tavor_hw_mcg_t *mcg_entry);
63 static int tavor_mcg_entry_invalidate(tavor_state_t *state,
64     tavor_hw_mcg_t *mcg_entry, uint_t indx);
65 static int tavor_mgid_is_valid(ib_gid_t gid);
66 static int tavor_mlid_is_valid(ib_lid_t lid);
67 
68 
69 /*
70  * tavor_ah_alloc()
71  *    Context: Can be called only from user or kernel context.
72  */
73 int
74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd,
75     ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
76 {
77 	tavor_rsrc_t		*udav, *rsrc;
78 	tavor_hw_udav_t		udav_entry;
79 	tavor_ahhdl_t		ah;
80 	ibt_mr_attr_t		mr_attr;
81 	tavor_mr_options_t	op;
82 	tavor_mrhdl_t		mr;
83 	uint64_t		data;
84 	uint32_t		size;
85 	int			status, i, flag;
86 	char			*errormsg;
87 
88 	TAVOR_TNF_ENTER(tavor_ah_alloc);
89 
90 	/*
91 	 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
92 	 * indicate that we wish to allocate an "invalid" (i.e. empty)
93 	 * address handle XXX
94 	 */
95 
96 	/* Validate that specified port number is legal */
97 	if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
98 		/* Set "status" and "errormsg" and goto failure */
99 		TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
100 		goto ahalloc_fail;
101 	}
102 
103 	/*
104 	 * Allocate a UDAV entry.  This will be filled in with all the
105 	 * necessary parameters to define the Address Handle.  Unlike the
106 	 * other hardware resources no ownership transfer takes place as
107 	 * these UDAV entries are always owned by hardware.
108 	 */
109 	status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav);
110 	if (status != DDI_SUCCESS) {
111 		/* Set "status" and "errormsg" and goto failure */
112 		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV");
113 		goto ahalloc_fail;
114 	}
115 
116 	/*
117 	 * Allocate the software structure for tracking the address handle
118 	 * (i.e. the Tavor Address Handle struct).  If we fail here, we must
119 	 * undo the previous resource allocation.
120 	 */
121 	status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc);
122 	if (status != DDI_SUCCESS) {
123 		/* Set "status" and "errormsg" and goto failure */
124 		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler");
125 		goto ahalloc_fail1;
126 	}
127 	ah = (tavor_ahhdl_t)rsrc->tr_addr;
128 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
129 
130 	/* Increment the reference count on the protection domain (PD) */
131 	tavor_pd_refcnt_inc(pd);
132 
133 	/*
134 	 * Fill in the UDAV entry.  Note: We are only filling in a temporary
135 	 * copy here, which we will later copy into the actual entry in
136 	 * Tavor DDR memory.  This starts be zeroing out the temporary copy
137 	 * and then calling tavor_set_addr_path() to fill in the common
138 	 * portions that can be pulled from the "ibt_adds_vect_t" passed in
139 	 */
140 	bzero(&udav_entry, sizeof (tavor_hw_udav_t));
141 	status = tavor_set_addr_path(state, attr_p,
142 	    (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
143 	if (status != DDI_SUCCESS) {
144 		tavor_pd_refcnt_dec(pd);
145 		tavor_rsrc_free(state, &rsrc);
146 		tavor_rsrc_free(state, &udav);
147 		/* Set "status" and "errormsg" and goto failure */
148 		TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path");
149 		goto ahalloc_fail;
150 	}
151 	udav_entry.pd	  = pd->pd_pdnum;
152 	udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1;
153 
154 	/*
155 	 * Register the memory for the UDAV.  The memory for the UDAV must
156 	 * be registered in the Tavor TPT tables.  This gives us the LKey
157 	 * that we will need when we later post a UD work request that
158 	 * uses this address handle.
159 	 * We might be able to pre-register all the memory for the UDAV XXX
160 	 */
161 	flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
162 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr;
163 	mr_attr.mr_len	 = udav->tr_len;
164 	mr_attr.mr_as	 = NULL;
165 	mr_attr.mr_flags = flag;
166 	op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
167 	op.mro_bind_dmahdl = NULL;
168 	op.mro_bind_override_addr = 0;
169 	status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
170 	if (status != DDI_SUCCESS) {
171 		/* Set "status" and "errormsg" and goto failure */
172 		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
173 		goto ahalloc_fail2;
174 	}
175 
176 	/*
177 	 * Fill in the UDAV entry.  Here we copy all the information from
178 	 * the temporary UDAV into the DDR memory for the real UDAV entry.
179 	 * Note that we copy everything but the first 64-bit word.  This
180 	 * is where the PD number for the address handle resides.
181 	 * By filling everything except the PD and then writing the PD in
182 	 * a separate step below, we can ensure that the UDAV is not
183 	 * accessed while there are partially written values in it (something
184 	 * which really should not happen anyway).  This is guaranteed
185 	 * because we take measures to ensure that the PD number is zero for
186 	 * all unused UDAV (and because PD#0 is reserved for Tavor).
187 	 */
188 	size = sizeof (tavor_hw_udav_t) >> 3;
189 	for (i = 1; i < size; i++) {
190 		data = ((uint64_t *)&udav_entry)[i];
191 		ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
192 		    data);
193 	}
194 	data = ((uint64_t *)&udav_entry)[0];
195 	ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data);
196 
197 	/*
198 	 * Fill in the rest of the Tavor Address Handle struct.  Having
199 	 * successfully copied the UDAV into the hardware, we update the
200 	 * following fields for use in further operations on the AH.
201 	 *
202 	 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
203 	 * here because we may need to return it later to the IBTF (as a
204 	 * result of a subsequent query operation).  Unlike the other UDAV
205 	 * parameters, the value of "av_dgid.gid_guid" is not always preserved
206 	 * by being written to hardware.  The reason for this is described in
207 	 * tavor_set_addr_path().
208 	 */
209 	ah->ah_udavrsrcp = udav;
210 	ah->ah_rsrcp	 = rsrc;
211 	ah->ah_pdhdl	 = pd;
212 	ah->ah_mrhdl	 = mr;
213 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
214 	ah->ah_save_srate = attr_p->av_srate;
215 	*ahhdl = ah;
216 
217 	/* Determine if later ddi_dma_sync will be necessary */
218 	ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state);
219 
220 	/* Sync the UDAV for use by the hardware */
221 	tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
222 
223 	TAVOR_TNF_EXIT(tavor_ah_alloc);
224 	return (DDI_SUCCESS);
225 
226 ahalloc_fail2:
227 	tavor_pd_refcnt_dec(pd);
228 	tavor_rsrc_free(state, &rsrc);
229 ahalloc_fail1:
230 	tavor_rsrc_free(state, &udav);
231 ahalloc_fail:
232 	TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "",
233 	    tnf_string, msg, errormsg);
234 	TAVOR_TNF_EXIT(tavor_ah_alloc);
235 	return (status);
236 }
237 
238 
239 /*
240  * tavor_ah_free()
241  *    Context: Can be called only from user or kernel context.
242  */
243 /* ARGSUSED */
244 int
245 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
246 {
247 	tavor_rsrc_t		*udav, *rsrc;
248 	tavor_pdhdl_t		pd;
249 	tavor_mrhdl_t		mr;
250 	tavor_ahhdl_t		ah;
251 	int			status;
252 
253 	TAVOR_TNF_ENTER(tavor_ah_free);
254 
255 	/*
256 	 * Pull all the necessary information from the Tavor Address Handle
257 	 * struct.  This is necessary here because the resource for the
258 	 * AH is going to be freed up as part of this operation.
259 	 */
260 	ah    = *ahhdl;
261 	mutex_enter(&ah->ah_lock);
262 	udav  = ah->ah_udavrsrcp;
263 	rsrc  = ah->ah_rsrcp;
264 	pd    = ah->ah_pdhdl;
265 	mr    = ah->ah_mrhdl;
266 	mutex_exit(&ah->ah_lock);
267 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
268 
269 	/*
270 	 * Deregister the memory for the UDAV.  If this fails for any reason,
271 	 * then it is an indication that something (either in HW or SW) has
272 	 * gone seriously wrong.  So we print a warning message and return
273 	 * failure.
274 	 */
275 	status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
276 	    sleepflag);
277 	if (status != DDI_SUCCESS) {
278 		TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
279 		TAVOR_TNF_EXIT(tavor_ah_free);
280 		return (ibc_get_ci_failure(0));
281 	}
282 
283 	/*
284 	 * Write zero to the first 64-bit word in the UDAV entry.  As
285 	 * described above (in tavor_ah_alloc), the PD number is stored in
286 	 * the first 64-bits of each UDAV and setting this to zero is
287 	 * guaranteed to invalidate the entry.
288 	 */
289 	ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0);
290 
291 	/* Sync the UDAV for use by the hardware */
292 	tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
293 
294 	/* Decrement the reference count on the protection domain (PD) */
295 	tavor_pd_refcnt_dec(pd);
296 
297 	/* Free the Tavor Address Handle structure */
298 	tavor_rsrc_free(state, &rsrc);
299 
300 	/* Free up the UDAV entry resource */
301 	tavor_rsrc_free(state, &udav);
302 
303 	/* Set the ahhdl pointer to NULL and return success */
304 	*ahhdl = NULL;
305 
306 	TAVOR_TNF_EXIT(tavor_ah_free);
307 	return (DDI_SUCCESS);
308 }
309 
310 
311 /*
312  * tavor_ah_query()
313  *    Context: Can be called from interrupt or base context.
314  */
315 /* ARGSUSED */
316 int
317 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd,
318     ibt_adds_vect_t *attr_p)
319 {
320 	tavor_hw_udav_t		udav_entry;
321 	tavor_rsrc_t		*udav;
322 	uint64_t		data;
323 	uint32_t		size;
324 	int			i;
325 
326 	TAVOR_TNF_ENTER(tavor_ah_query);
327 
328 	mutex_enter(&ah->ah_lock);
329 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
330 
331 	/*
332 	 * Pull all the necessary information from the Tavor Address Handle
333 	 * structure
334 	 */
335 	udav	= ah->ah_udavrsrcp;
336 	*pd	= ah->ah_pdhdl;
337 
338 	/*
339 	 * Copy the UDAV entry into the temporary copy.  Here we copy all
340 	 * the information from the UDAV entry in DDR memory into the
341 	 * temporary UDAV.  Note:  We don't need to sync the UDAV for
342 	 * reading by software because Tavor HW never modifies the entry.
343 	 */
344 	size = sizeof (tavor_hw_udav_t) >> 3;
345 	for (i = 0; i < size; i++) {
346 		data = ddi_get64(udav->tr_acchdl,
347 		    ((uint64_t *)udav->tr_addr + i));
348 		((uint64_t *)&udav_entry)[i] = data;
349 	}
350 
351 	/*
352 	 * Fill in "ibt_adds_vect_t".  We call tavor_get_addr_path() to fill
353 	 * the common portions that can be pulled from the UDAV we pass in.
354 	 *
355 	 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
356 	 * "ah_save_guid" field we have previously saved away.  The reason
357 	 * for this is described in tavor_ah_alloc() and tavor_ah_modify().
358 	 */
359 	tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry,
360 	    attr_p, TAVOR_ADDRPATH_UDAV, NULL);
361 
362 	attr_p->av_dgid.gid_guid = ah->ah_save_guid;
363 	attr_p->av_srate = ah->ah_save_srate;
364 
365 	mutex_exit(&ah->ah_lock);
366 	TAVOR_TNF_EXIT(tavor_ah_query);
367 	return (DDI_SUCCESS);
368 }
369 
370 
371 /*
372  * tavor_ah_modify()
373  *    Context: Can be called from interrupt or base context.
374  */
375 /* ARGSUSED */
376 int
377 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah,
378     ibt_adds_vect_t *attr_p)
379 {
380 	tavor_hw_udav_t		udav_entry;
381 	tavor_rsrc_t		*udav;
382 	uint64_t		data_new, data_old;
383 	uint32_t		udav_pd, size, portnum_new;
384 	int			i, status;
385 
386 	TAVOR_TNF_ENTER(tavor_ah_modify);
387 
388 	/* Validate that specified port number is legal */
389 	if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
390 		TNF_PROBE_1(tavor_ah_modify_inv_portnum,
391 		    TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num);
392 		TAVOR_TNF_EXIT(tavor_ah_modify);
393 		return (IBT_HCA_PORT_INVALID);
394 	}
395 
396 	mutex_enter(&ah->ah_lock);
397 
398 	/*
399 	 * Pull all the necessary information from the Tavor Address Handle
400 	 * structure
401 	 */
402 	udav = ah->ah_udavrsrcp;
403 
404 	/*
405 	 * Fill in the UDAV entry.  Note: we are only filling in a temporary
406 	 * copy here, which we will later copy into the actual entry in
407 	 * Tavor DDR memory.  This starts be zeroing out the temporary copy
408 	 * and then calling tavor_set_addr_path() to fill in the common
409 	 * portions that can be pulled from the "ibt_adds_vect_t" passed in
410 	 *
411 	 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
412 	 * field here (just as we did during tavor_ah_alloc()) because we
413 	 * may need to return it later to the IBTF (as a result of a
414 	 * subsequent query operation).  As explained in tavor_ah_alloc(),
415 	 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
416 	 * is not always preserved by being written to hardware.  The reason
417 	 * for this is described in tavor_set_addr_path().
418 	 */
419 	bzero(&udav_entry, sizeof (tavor_hw_udav_t));
420 	status = tavor_set_addr_path(state, attr_p,
421 	    (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
422 	if (status != DDI_SUCCESS) {
423 		mutex_exit(&ah->ah_lock);
424 		TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail,
425 		    TAVOR_TNF_ERROR, "");
426 		TAVOR_TNF_EXIT(tavor_ah_modify);
427 		return (status);
428 	}
429 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
430 	ah->ah_save_srate = attr_p->av_srate;
431 
432 	/*
433 	 * Save away the current PD number for this UDAV.  Then temporarily
434 	 * invalidate the entry (by setting the PD to zero).  Note:  Since
435 	 * the first 32 bits of the UDAV actually contain the current port
436 	 * number _and_ current PD number, we need to mask off some bits.
437 	 */
438 	udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr);
439 	udav_pd = udav_pd & 0xFFFFFF;
440 	ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0);
441 
442 	/* Sync the UDAV for use by the hardware */
443 	tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
444 
445 	/*
446 	 * Copy UDAV structure to the entry
447 	 *    Note:  We copy in 64-bit chunks.  For the first two of these
448 	 *    chunks it is necessary to read the current contents of the
449 	 *    UDAV, mask off the modifiable portions (maintaining any
450 	 *    of the "reserved" portions), and then mask on the new data.
451 	 */
452 	size = sizeof (tavor_hw_udav_t) >> 3;
453 	for (i = 0; i < size; i++) {
454 		data_new = ((uint64_t *)&udav_entry)[i];
455 		data_old = ddi_get64(udav->tr_acchdl,
456 		    ((uint64_t *)udav->tr_addr + i));
457 
458 		/*
459 		 * Apply mask to change only the relevant values.  Note: We
460 		 * extract the new portnum from the address handle here
461 		 * because the "PD" and "portnum" fields are in the same
462 		 * 32-bit word in the UDAV.  We will use the (new) port
463 		 * number extracted here when we write the valid PD number
464 		 * in the last step below.
465 		 */
466 		if (i == 0) {
467 			data_old = data_old & TAVOR_UDAV_MODIFY_MASK0;
468 			portnum_new = data_new >> 56;
469 		} else if (i == 1) {
470 			data_old = data_old & TAVOR_UDAV_MODIFY_MASK1;
471 		} else {
472 			data_old = 0;
473 		}
474 
475 		/* Write the updated values to the UDAV (in DDR) */
476 		data_new = data_old | data_new;
477 		ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
478 		    data_new);
479 	}
480 
481 	/*
482 	 * Sync the body of the UDAV for use by the hardware.  After we
483 	 * have updated the PD number (to make the UDAV valid), we sync
484 	 * again to push the entire entry out for hardware access.
485 	 */
486 	tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
487 
488 	/*
489 	 * Put the valid PD number back into UDAV entry.  Note: Because port
490 	 * number and PD number are in the same word, we must mask the
491 	 * new port number with the old PD number before writing it back
492 	 * to the UDAV entry
493 	 */
494 	udav_pd = ((portnum_new << 24) | udav_pd);
495 	ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd);
496 
497 	/* Sync the rest of the UDAV for use by the hardware */
498 	tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
499 
500 	mutex_exit(&ah->ah_lock);
501 	TAVOR_TNF_EXIT(tavor_ah_modify);
502 	return (DDI_SUCCESS);
503 }
504 
505 
506 /*
507  * tavor_udav_sync()
508  *    Context: Can be called from interrupt or base context.
509  */
510 /* ARGSUSED */
511 static void
512 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag)
513 {
514 	ddi_dma_handle_t	dmahdl;
515 	off_t			offset;
516 	int			status;
517 
518 	TAVOR_TNF_ENTER(tavor_udav_sync);
519 
520 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
521 
522 	/* Determine if AH needs to be synced or not */
523 	if (ah->ah_sync == 0) {
524 		TAVOR_TNF_EXIT(tavor_udav_sync);
525 		return;
526 	}
527 
528 	/* Get the DMA handle from AH handle */
529 	dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl;
530 
531 	/* Calculate offset into address handle */
532 	offset = (off_t)0;
533 	status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag);
534 	if (status != DDI_SUCCESS) {
535 		TNF_PROBE_0(tavor_udav_sync_getnextentry_fail,
536 		    TAVOR_TNF_ERROR, "");
537 		TAVOR_TNF_EXIT(tavor_udav_sync);
538 		return;
539 	}
540 
541 	TAVOR_TNF_EXIT(tavor_udav_sync);
542 }
543 
544 
545 /*
546  * tavor_mcg_attach()
547  *    Context: Can be called only from user or kernel context.
548  */
549 int
550 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
551     ib_lid_t lid)
552 {
553 	tavor_rsrc_t		*rsrc;
554 	tavor_hw_mcg_t		*mcg_entry;
555 	tavor_hw_mcg_qp_list_t	*mcg_entry_qplist;
556 	tavor_mcghdl_t		mcg, newmcg;
557 	uint64_t		mgid_hash;
558 	uint32_t		end_indx;
559 	int			status;
560 	uint_t			qp_found;
561 	char			*errormsg;
562 
563 	TAVOR_TNF_ENTER(tavor_mcg_attach);
564 
565 	/*
566 	 * It is only allowed to attach MCG to UD queue pairs.  Verify
567 	 * that the intended QP is of the appropriate transport type
568 	 */
569 	if (qp->qp_serv_type != TAVOR_QP_UD) {
570 		/* Set "status" and "errormsg" and goto failure */
571 		TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type");
572 		goto mcgattach_fail;
573 	}
574 
575 	/*
576 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
577 	 * LIDs should be within a well defined range.  If the specified LID
578 	 * is outside of that range, then return an error.
579 	 */
580 	if (tavor_mlid_is_valid(lid) == 0) {
581 		/* Set "status" and "errormsg" and goto failure */
582 		TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID");
583 		goto mcgattach_fail;
584 	}
585 	/*
586 	 * Check for invalid Multicast GID.  All Multicast GIDs should have
587 	 * a well-defined pattern of bits and flags that are allowable.  If
588 	 * the specified GID does not meet the criteria, then return an error.
589 	 */
590 	if (tavor_mgid_is_valid(gid) == 0) {
591 		/* Set "status" and "errormsg" and goto failure */
592 		TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID");
593 		goto mcgattach_fail;
594 	}
595 
596 	/*
597 	 * Compute the MGID hash value.  Since the MCG table is arranged as
598 	 * a number of separate hash chains, this operation converts the
599 	 * specified MGID into the starting index of an entry in the hash
600 	 * table (i.e. the index for the start of the appropriate hash chain).
601 	 * Subsequent operations below will walk the chain searching for the
602 	 * right place to add this new QP.
603 	 */
604 	status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
605 	    &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
606 	if (status != TAVOR_CMD_SUCCESS) {
607 		cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
608 		    status);
609 		TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail,
610 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
611 		TAVOR_TNF_EXIT(tavor_mcg_attach);
612 		return (ibc_get_ci_failure(0));
613 	}
614 
615 	/*
616 	 * Grab the multicast group mutex.  Then grab the pre-allocated
617 	 * temporary buffer used for holding and/or modifying MCG entries.
618 	 * Zero out the temporary MCG entry before we begin.
619 	 */
620 	mutex_enter(&state->ts_mcglock);
621 	mcg_entry = state->ts_mcgtmp;
622 	mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
623 	bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
624 
625 	/*
626 	 * Walk through the array of MCG entries starting at "mgid_hash".
627 	 * Try to find the appropriate place for this new QP to be added.
628 	 * This could happen when the first entry of the chain has MGID == 0
629 	 * (which means that the hash chain is empty), or because we find
630 	 * an entry with the same MGID (in which case we'll add the QP to
631 	 * that MCG), or because we come to the end of the chain (in which
632 	 * case this is the first QP being added to the multicast group that
633 	 * corresponds to the MGID.  The tavor_mcg_walk_mgid_hash() routine
634 	 * walks the list and returns an index into the MCG table.  The entry
635 	 * at this index is then checked to determine which case we have
636 	 * fallen into (see below).  Note:  We are using the "shadow" MCG
637 	 * list (of tavor_mcg_t structs) for this lookup because the real
638 	 * MCG entries are in hardware (and the lookup process would be much
639 	 * more time consuming).
640 	 */
641 	end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
642 	mcg	 = &state->ts_mcghdl[end_indx];
643 
644 	/*
645 	 * If MGID == 0, then the hash chain is empty.  Just fill in the
646 	 * current entry.  Note:  No need to allocate an MCG table entry
647 	 * as all the hash chain "heads" are already preallocated.
648 	 */
649 	if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
650 
651 		/* Fill in the current entry in the "shadow" MCG list */
652 		tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
653 
654 		/*
655 		 * Try to add the new QP number to the list.  This (and the
656 		 * above) routine fills in a temporary MCG.  The "mcg_entry"
657 		 * and "mcg_entry_qplist" pointers simply point to different
658 		 * offsets within the same temporary copy of the MCG (for
659 		 * convenience).  Note:  If this fails, we need to invalidate
660 		 * the entries we've already put into the "shadow" list entry
661 		 * above.
662 		 */
663 		status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
664 		    &qp_found);
665 		if (status != DDI_SUCCESS) {
666 			bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
667 			mutex_exit(&state->ts_mcglock);
668 			/* Set "status" and "errormsg" and goto failure */
669 			TAVOR_TNF_FAIL(status, "failed qplist add");
670 			goto mcgattach_fail;
671 		}
672 
673 		/*
674 		 * Once the temporary MCG has been filled in, write the entry
675 		 * into the appropriate location in the Tavor MCG entry table.
676 		 * If it's successful, then drop the lock and return success.
677 		 * Note: In general, this operation shouldn't fail.  If it
678 		 * does, then it is an indication that something (probably in
679 		 * HW, but maybe in SW) has gone seriously wrong.  We still
680 		 * want to zero out the entries that we've filled in above
681 		 * (in the tavor_mcg_setup_new_hdr() routine).
682 		 */
683 		status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
684 		    TAVOR_CMD_NOSLEEP_SPIN);
685 		if (status != TAVOR_CMD_SUCCESS) {
686 			bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
687 			mutex_exit(&state->ts_mcglock);
688 			TAVOR_WARNING(state, "failed to write MCG entry");
689 			cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
690 			    "%08x\n", status);
691 			TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
692 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
693 			    tnf_uint, indx, end_indx);
694 			TAVOR_TNF_EXIT(tavor_mcg_attach);
695 			return (ibc_get_ci_failure(0));
696 		}
697 
698 		/*
699 		 * Now that we know all the Tavor firmware accesses have been
700 		 * successful, we update the "shadow" MCG entry by incrementing
701 		 * the "number of attached QPs" count.
702 		 *
703 		 * We increment only if the QP is not already part of the
704 		 * MCG by checking the 'qp_found' flag returned from the
705 		 * qplist_add above.
706 		 */
707 		if (!qp_found) {
708 			mcg->mcg_num_qps++;
709 
710 			/*
711 			 * Increment the refcnt for this QP.  Because the QP
712 			 * was added to this MCG, the refcnt must be
713 			 * incremented.
714 			 */
715 			tavor_qp_mcg_refcnt_inc(qp);
716 		}
717 
718 		/*
719 		 * We drop the lock and return success.
720 		 */
721 		mutex_exit(&state->ts_mcglock);
722 		TAVOR_TNF_EXIT(tavor_mcg_attach);
723 		return (DDI_SUCCESS);
724 	}
725 
726 	/*
727 	 * If the specified MGID matches the MGID in the current entry, then
728 	 * we need to try to add the QP to the current MCG entry.  In this
729 	 * case, it means that we need to read the existing MCG entry (into
730 	 * the temporary MCG), add the new QP number to the temporary entry
731 	 * (using the same method we used above), and write the entry back
732 	 * to the hardware (same as above).
733 	 */
734 	if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
735 	    (mcg->mcg_mgid_l == gid.gid_guid)) {
736 
737 		/*
738 		 * Read the current MCG entry into the temporary MCG.  Note:
739 		 * In general, this operation shouldn't fail.  If it does,
740 		 * then it is an indication that something (probably in HW,
741 		 * but maybe in SW) has gone seriously wrong.
742 		 */
743 		status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
744 		    TAVOR_CMD_NOSLEEP_SPIN);
745 		if (status != TAVOR_CMD_SUCCESS) {
746 			mutex_exit(&state->ts_mcglock);
747 			TAVOR_WARNING(state, "failed to read MCG entry");
748 			cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
749 			    "%08x\n", status);
750 			TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
751 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
752 			    tnf_uint, indx, end_indx);
753 			TAVOR_TNF_EXIT(tavor_mcg_attach);
754 			return (ibc_get_ci_failure(0));
755 		}
756 
757 		/*
758 		 * Try to add the new QP number to the list.  This routine
759 		 * fills in the necessary pieces of the temporary MCG.  The
760 		 * "mcg_entry_qplist" pointer is used to point to the portion
761 		 * of the temporary MCG that holds the QP numbers.
762 		 *
763 		 * Note: tavor_mcg_qplist_add() returns SUCCESS if it
764 		 * already found the QP in the list.  In this case, the QP is
765 		 * not added on to the list again.  Check the flag 'qp_found'
766 		 * if this value is needed to be known.
767 		 *
768 		 */
769 		status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
770 		    &qp_found);
771 		if (status != DDI_SUCCESS) {
772 			mutex_exit(&state->ts_mcglock);
773 			/* Set "status" and "errormsg" and goto failure */
774 			TAVOR_TNF_FAIL(status, "failed qplist add");
775 			goto mcgattach_fail;
776 		}
777 
778 		/*
779 		 * Once the temporary MCG has been updated, write the entry
780 		 * into the appropriate location in the Tavor MCG entry table.
781 		 * If it's successful, then drop the lock and return success.
782 		 * Note: In general, this operation shouldn't fail.  If it
783 		 * does, then it is an indication that something (probably in
784 		 * HW, but maybe in SW) has gone seriously wrong.
785 		 */
786 		status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
787 		    TAVOR_CMD_NOSLEEP_SPIN);
788 		if (status != TAVOR_CMD_SUCCESS) {
789 			mutex_exit(&state->ts_mcglock);
790 			TAVOR_WARNING(state, "failed to write MCG entry");
791 			cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
792 			    "%08x\n", status);
793 			TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
794 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
795 			    tnf_uint, indx, end_indx);
796 			TAVOR_TNF_EXIT(tavor_mcg_attach);
797 			return (ibc_get_ci_failure(0));
798 		}
799 
800 		/*
801 		 * Now that we know all the Tavor firmware accesses have been
802 		 * successful, we update the current "shadow" MCG entry by
803 		 * incrementing the "number of attached QPs" count.
804 		 *
805 		 * We increment only if the QP is not already part of the
806 		 * MCG by checking the 'qp_found' flag returned from the
807 		 * qplist_add above.
808 		 */
809 		if (!qp_found) {
810 			mcg->mcg_num_qps++;
811 
812 			/*
813 			 * Increment the refcnt for this QP.  Because the QP
814 			 * was added to this MCG, the refcnt must be
815 			 * incremented.
816 			 */
817 			tavor_qp_mcg_refcnt_inc(qp);
818 		}
819 
820 		/*
821 		 * We drop the lock and return success.
822 		 */
823 		mutex_exit(&state->ts_mcglock);
824 		TAVOR_TNF_EXIT(tavor_mcg_attach);
825 		return (DDI_SUCCESS);
826 	}
827 
828 	/*
829 	 * If we've reached here, then we're at the end of the hash chain.
830 	 * We need to allocate a new MCG entry, fill it in, write it to Tavor,
831 	 * and update the previous entry to link the new one to the end of the
832 	 * chain.
833 	 */
834 
835 	/*
836 	 * Allocate an MCG table entry.  This will be filled in with all
837 	 * the necessary parameters to define the multicast group.  Then it
838 	 * will be written to the hardware in the next-to-last step below.
839 	 */
840 	status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc);
841 	if (status != DDI_SUCCESS) {
842 		mutex_exit(&state->ts_mcglock);
843 		/* Set "status" and "errormsg" and goto failure */
844 		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG");
845 		goto mcgattach_fail;
846 	}
847 
848 	/*
849 	 * Fill in the new entry in the "shadow" MCG list.  Note:  Just as
850 	 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion
851 	 * of the temporary MCG entry (the rest of which will be filled in by
852 	 * tavor_mcg_qplist_add() below)
853 	 */
854 	newmcg = &state->ts_mcghdl[rsrc->tr_indx];
855 	tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
856 
857 	/*
858 	 * Try to add the new QP number to the list.  This routine fills in
859 	 * the final necessary pieces of the temporary MCG.  The
860 	 * "mcg_entry_qplist" pointer is used to point to the portion of the
861 	 * temporary MCG that holds the QP numbers.  If we fail here, we
862 	 * must undo the previous resource allocation.
863 	 *
864 	 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already
865 	 * found the QP in the list.  In this case, the QP is not added on to
866 	 * the list again.  Check the flag 'qp_found' if this value is needed
867 	 * to be known.
868 	 */
869 	status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
870 	    &qp_found);
871 	if (status != DDI_SUCCESS) {
872 		bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
873 		tavor_rsrc_free(state, &rsrc);
874 		mutex_exit(&state->ts_mcglock);
875 		/* Set "status" and "errormsg" and goto failure */
876 		TAVOR_TNF_FAIL(status, "failed qplist add");
877 		goto mcgattach_fail;
878 	}
879 
880 	/*
881 	 * Once the temporary MCG has been updated, write the entry into the
882 	 * appropriate location in the Tavor MCG entry table.  If this is
883 	 * successful, then we need to chain the previous entry to this one.
884 	 * Note: In general, this operation shouldn't fail.  If it does, then
885 	 * it is an indication that something (probably in HW, but maybe in
886 	 * SW) has gone seriously wrong.
887 	 */
888 	status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx,
889 	    TAVOR_CMD_NOSLEEP_SPIN);
890 	if (status != TAVOR_CMD_SUCCESS) {
891 		bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
892 		tavor_rsrc_free(state, &rsrc);
893 		mutex_exit(&state->ts_mcglock);
894 		TAVOR_WARNING(state, "failed to write MCG entry");
895 		cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
896 		    status);
897 		TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
898 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
899 		    tnf_uint, indx, rsrc->tr_indx);
900 		TAVOR_TNF_EXIT(tavor_mcg_attach);
901 		return (ibc_get_ci_failure(0));
902 	}
903 
904 	/*
905 	 * Now read the current MCG entry (the one previously at the end of
906 	 * hash chain) into the temporary MCG.  We are going to update its
907 	 * "next_gid_indx" now and write the entry back to the MCG table.
908 	 * Note:  In general, this operation shouldn't fail.  If it does, then
909 	 * it is an indication that something (probably in HW, but maybe in SW)
910 	 * has gone seriously wrong.  We will free up the MCG entry resource,
911 	 * but we will not undo the previously written MCG entry in the HW.
912 	 * This is OK, though, because the MCG entry is not currently attached
913 	 * to any hash chain.
914 	 */
915 	status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
916 	    TAVOR_CMD_NOSLEEP_SPIN);
917 	if (status != TAVOR_CMD_SUCCESS) {
918 		bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
919 		tavor_rsrc_free(state, &rsrc);
920 		mutex_exit(&state->ts_mcglock);
921 		TAVOR_WARNING(state, "failed to read MCG entry");
922 		cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
923 		    status);
924 		TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
925 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
926 		    tnf_uint, indx, end_indx);
927 		TAVOR_TNF_EXIT(tavor_mcg_attach);
928 		return (ibc_get_ci_failure(0));
929 	}
930 
931 	/*
932 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
933 	 * and attempt to write the entry back into the Tavor MCG table.  If
934 	 * this succeeds, then we update the "shadow" list to reflect the
935 	 * change, drop the lock, and return success.  Note:  In general, this
936 	 * operation shouldn't fail.  If it does, then it is an indication
937 	 * that something (probably in HW, but maybe in SW) has gone seriously
938 	 * wrong.  Just as we do above, we will free up the MCG entry resource,
939 	 * but we will not try to undo the previously written MCG entry.  This
940 	 * is OK, though, because (since we failed here to update the end of
941 	 * the chain) that other entry is not currently attached to any chain.
942 	 */
943 	mcg_entry->next_gid_indx = rsrc->tr_indx;
944 	status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
945 	    TAVOR_CMD_NOSLEEP_SPIN);
946 	if (status != TAVOR_CMD_SUCCESS) {
947 		bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
948 		tavor_rsrc_free(state, &rsrc);
949 		mutex_exit(&state->ts_mcglock);
950 		TAVOR_WARNING(state, "failed to write MCG entry");
951 		cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
952 		    status);
953 		TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
954 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
955 		    tnf_uint, indx, end_indx);
956 		TAVOR_TNF_EXIT(tavor_mcg_attach);
957 		return (ibc_get_ci_failure(0));
958 	}
959 	mcg = &state->ts_mcghdl[end_indx];
960 	mcg->mcg_next_indx = rsrc->tr_indx;
961 
962 	/*
963 	 * Now that we know all the Tavor firmware accesses have been
964 	 * successful, we update the new "shadow" MCG entry by incrementing
965 	 * the "number of attached QPs" count.  Then we drop the lock and
966 	 * return success.
967 	 */
968 	newmcg->mcg_num_qps++;
969 
970 	/*
971 	 * Increment the refcnt for this QP.  Because the QP
972 	 * was added to this MCG, the refcnt must be
973 	 * incremented.
974 	 */
975 	tavor_qp_mcg_refcnt_inc(qp);
976 
977 	mutex_exit(&state->ts_mcglock);
978 	TAVOR_TNF_EXIT(tavor_mcg_attach);
979 	return (DDI_SUCCESS);
980 
981 mcgattach_fail:
982 	TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string,
983 	    msg, errormsg);
984 	TAVOR_TNF_EXIT(tavor_mcg_attach);
985 	return (status);
986 }
987 
988 
989 /*
990  * tavor_mcg_detach()
991  *    Context: Can be called only from user or kernel context.
992  */
993 int
994 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
995     ib_lid_t lid)
996 {
997 	tavor_hw_mcg_t		*mcg_entry;
998 	tavor_hw_mcg_qp_list_t	*mcg_entry_qplist;
999 	tavor_mcghdl_t		mcg;
1000 	uint64_t		mgid_hash;
1001 	uint32_t		end_indx, prev_indx;
1002 	int			status;
1003 
1004 	TAVOR_TNF_ENTER(tavor_mcg_detach);
1005 
1006 	/*
1007 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
1008 	 * LIDs should be within a well defined range.  If the specified LID
1009 	 * is outside of that range, then return an error.
1010 	 */
1011 	if (tavor_mlid_is_valid(lid) == 0) {
1012 		TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, "");
1013 		TAVOR_TNF_EXIT(tavor_mcg_detach);
1014 		return (IBT_MC_MLID_INVALID);
1015 	}
1016 
1017 	/*
1018 	 * Compute the MGID hash value.  As described above, the MCG table is
1019 	 * arranged as a number of separate hash chains.  This operation
1020 	 * converts the specified MGID into the starting index of an entry in
1021 	 * the hash table (i.e. the index for the start of the appropriate
1022 	 * hash chain).  Subsequent operations below will walk the chain
1023 	 * searching for a matching entry from which to attempt to remove
1024 	 * the specified QP.
1025 	 */
1026 	status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1027 	    &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
1028 	if (status != TAVOR_CMD_SUCCESS) {
1029 		cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
1030 		    status);
1031 		TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail,
1032 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1033 		TAVOR_TNF_EXIT(tavor_mcg_attach);
1034 		return (ibc_get_ci_failure(0));
1035 	}
1036 
1037 	/*
1038 	 * Grab the multicast group mutex.  Then grab the pre-allocated
1039 	 * temporary buffer used for holding and/or modifying MCG entries.
1040 	 */
1041 	mutex_enter(&state->ts_mcglock);
1042 	mcg_entry = state->ts_mcgtmp;
1043 	mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
1044 
1045 	/*
1046 	 * Walk through the array of MCG entries starting at "mgid_hash".
1047 	 * Try to find an MCG entry with a matching MGID.  The
1048 	 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an
1049 	 * index into the MCG table.  The entry at this index is checked to
1050 	 * determine whether it is a match or not.  If it is a match, then
1051 	 * we continue on to attempt to remove the QP from the MCG.  If it
1052 	 * is not a match (or not a valid MCG entry), then we return an error.
1053 	 */
1054 	end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1055 	mcg	 = &state->ts_mcghdl[end_indx];
1056 
1057 	/*
1058 	 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1059 	 * does not match the MGID in the current entry, then return
1060 	 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1061 	 * valid).
1062 	 */
1063 	if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1064 	    ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1065 	    (mcg->mcg_mgid_l != gid.gid_guid))) {
1066 		mutex_exit(&state->ts_mcglock);
1067 		TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, "");
1068 		TAVOR_TNF_EXIT(tavor_mcg_detach);
1069 		return (IBT_MC_MGID_INVALID);
1070 	}
1071 
1072 	/*
1073 	 * Read the current MCG entry into the temporary MCG.  Note: In
1074 	 * general, this operation shouldn't fail.  If it does, then it is
1075 	 * an indication that something (probably in HW, but maybe in SW)
1076 	 * has gone seriously wrong.
1077 	 */
1078 	status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
1079 	    TAVOR_CMD_NOSLEEP_SPIN);
1080 	if (status != TAVOR_CMD_SUCCESS) {
1081 		mutex_exit(&state->ts_mcglock);
1082 		TAVOR_WARNING(state, "failed to read MCG entry");
1083 		cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1084 		    status);
1085 		TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail,
1086 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1087 		    tnf_uint, indx, end_indx);
1088 		TAVOR_TNF_EXIT(tavor_mcg_attach);
1089 		return (ibc_get_ci_failure(0));
1090 	}
1091 
1092 	/*
1093 	 * Search the QP number list for a match.  If a match is found, then
1094 	 * remove the entry from the QP list.  Otherwise, if no match is found,
1095 	 * return an error.
1096 	 */
1097 	status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1098 	if (status != DDI_SUCCESS) {
1099 		mutex_exit(&state->ts_mcglock);
1100 		TAVOR_TNF_EXIT(tavor_mcg_detach);
1101 		return (status);
1102 	}
1103 
1104 	/*
1105 	 * Decrement the MCG count for this QP.  When the 'qp_mcg'
1106 	 * field becomes 0, then this QP is no longer a member of any
1107 	 * MCG.
1108 	 */
1109 	tavor_qp_mcg_refcnt_dec(qp);
1110 
1111 	/*
1112 	 * If the current MCG's QP number list is about to be made empty
1113 	 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1114 	 * chain.  Otherwise, just write the updated MCG entry back to the
1115 	 * hardware.  In either case, once we successfully update the hardware
1116 	 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1117 	 * count (or zero out the entire "shadow" list entry) before returning
1118 	 * success.  Note:  Zeroing out the "shadow" list entry is done
1119 	 * inside of tavor_mcg_hash_list_remove().
1120 	 */
1121 	if (mcg->mcg_num_qps == 1) {
1122 
1123 		/* Remove an MCG entry from the hash chain */
1124 		status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx,
1125 		    mcg_entry);
1126 		if (status != DDI_SUCCESS) {
1127 			mutex_exit(&state->ts_mcglock);
1128 			TAVOR_TNF_EXIT(tavor_mcg_detach);
1129 			return (status);
1130 		}
1131 
1132 	} else {
1133 		/*
1134 		 * Write the updated MCG entry back to the Tavor MCG table.
1135 		 * If this succeeds, then we update the "shadow" list to
1136 		 * reflect the change (i.e. decrement the "mcg_num_qps"),
1137 		 * drop the lock, and return success.  Note:  In general,
1138 		 * this operation shouldn't fail.  If it does, then it is an
1139 		 * indication that something (probably in HW, but maybe in SW)
1140 		 * has gone seriously wrong.
1141 		 */
1142 		status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
1143 		    TAVOR_CMD_NOSLEEP_SPIN);
1144 		if (status != TAVOR_CMD_SUCCESS) {
1145 			mutex_exit(&state->ts_mcglock);
1146 			TAVOR_WARNING(state, "failed to write MCG entry");
1147 			cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1148 			    "%08x\n", status);
1149 			TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail,
1150 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1151 			    tnf_uint, indx, end_indx);
1152 			TAVOR_TNF_EXIT(tavor_mcg_detach);
1153 			return (ibc_get_ci_failure(0));
1154 		}
1155 		mcg->mcg_num_qps--;
1156 	}
1157 
1158 	mutex_exit(&state->ts_mcglock);
1159 	TAVOR_TNF_EXIT(tavor_mcg_detach);
1160 	return (DDI_SUCCESS);
1161 }
1162 
1163 /*
1164  * tavor_qp_mcg_refcnt_inc()
1165  *    Context: Can be called from interrupt or base context.
1166  */
1167 static void
1168 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp)
1169 {
1170 	/* Increment the QP's MCG reference count */
1171 	mutex_enter(&qp->qp_lock);
1172 	qp->qp_mcg_refcnt++;
1173 	TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "",
1174 	    tnf_uint, refcnt, qp->qp_mcg_refcnt);
1175 	mutex_exit(&qp->qp_lock);
1176 }
1177 
1178 
1179 /*
1180  * tavor_qp_mcg_refcnt_dec()
1181  *    Context: Can be called from interrupt or base context.
1182  */
1183 static void
1184 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp)
1185 {
1186 	/* Decrement the QP's MCG reference count */
1187 	mutex_enter(&qp->qp_lock);
1188 	qp->qp_mcg_refcnt--;
1189 	TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "",
1190 	    tnf_uint, refcnt, qp->qp_mcg_refcnt);
1191 	mutex_exit(&qp->qp_lock);
1192 }
1193 
1194 
1195 /*
1196  * tavor_mcg_qplist_add()
1197  *    Context: Can be called from interrupt or base context.
1198  */
1199 static int
1200 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
1201     tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp,
1202     uint_t *qp_found)
1203 {
1204 	uint_t		qplist_indx;
1205 
1206 	TAVOR_TNF_ENTER(tavor_mcg_qplist_add);
1207 
1208 	ASSERT(MUTEX_HELD(&state->ts_mcglock));
1209 
1210 	qplist_indx = mcg->mcg_num_qps;
1211 
1212 	/*
1213 	 * Determine if we have exceeded the maximum number of QP per
1214 	 * multicast group.  If we have, then return an error
1215 	 */
1216 	if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) {
1217 		TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps,
1218 		    TAVOR_TNF_ERROR, "");
1219 		TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1220 		return (IBT_HCA_MCG_QP_EXCEEDED);
1221 	}
1222 
1223 	/*
1224 	 * Determine if the QP is already attached to this MCG table.  If it
1225 	 * is, then we break out and treat this operation as a NO-OP
1226 	 */
1227 	for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1228 	    qplist_indx++) {
1229 		if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1230 			break;
1231 		}
1232 	}
1233 
1234 	/*
1235 	 * If the QP was already on the list, set 'qp_found' to TRUE.  We still
1236 	 * return SUCCESS in this case, but the qplist will not have been
1237 	 * updated because the QP was already on the list.
1238 	 */
1239 	if (qplist_indx < mcg->mcg_num_qps) {
1240 		*qp_found = 1;
1241 	} else {
1242 		/*
1243 		 * Otherwise, append the new QP number to the end of the
1244 		 * current QP list.  Note: We will increment the "mcg_num_qps"
1245 		 * field on the "shadow" MCG list entry later (after we know
1246 		 * that all necessary Tavor firmware accesses have been
1247 		 * successful).
1248 		 *
1249 		 * Set 'qp_found' to 0 so we know the QP was added on to the
1250 		 * list for sure.
1251 		 */
1252 		mcg_qplist[qplist_indx].q   = TAVOR_MCG_QPN_VALID;
1253 		mcg_qplist[qplist_indx].qpn = qp->qp_qpnum;
1254 		*qp_found = 0;
1255 	}
1256 
1257 	TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1258 	return (DDI_SUCCESS);
1259 }
1260 
1261 
1262 
1263 /*
1264  * tavor_mcg_qplist_remove()
1265  *    Context: Can be called from interrupt or base context.
1266  */
1267 static int
1268 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist,
1269     tavor_qphdl_t qp)
1270 {
1271 	uint_t		i, qplist_indx;
1272 
1273 	TAVOR_TNF_ENTER(tavor_mcg_qplist_remove);
1274 
1275 	/*
1276 	 * Search the MCG QP list for a matching QPN.  When
1277 	 * it's found, we swap the last entry with the current
1278 	 * one, set the last entry to zero, decrement the last
1279 	 * entry, and return.  If it's not found, then it's
1280 	 * and error.
1281 	 */
1282 	qplist_indx = mcg->mcg_num_qps;
1283 	for (i = 0; i < qplist_indx; i++) {
1284 		if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1285 			mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1286 			mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID;
1287 			mcg_qplist[qplist_indx - 1].qpn = 0;
1288 
1289 			TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1290 			return (DDI_SUCCESS);
1291 		}
1292 	}
1293 
1294 	TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, "");
1295 	TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1296 	return (IBT_QP_HDL_INVALID);
1297 }
1298 
1299 
1300 /*
1301  * tavor_mcg_walk_mgid_hash()
1302  *    Context: Can be called from interrupt or base context.
1303  */
1304 static uint_t
1305 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx,
1306     ib_gid_t mgid, uint_t *p_indx)
1307 {
1308 	tavor_mcghdl_t	curr_mcghdl;
1309 	uint_t		curr_indx, prev_indx;
1310 
1311 	TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash);
1312 
1313 	ASSERT(MUTEX_HELD(&state->ts_mcglock));
1314 
1315 	/* Start at the head of the hash chain */
1316 	curr_indx   = start_indx;
1317 	prev_indx   = curr_indx;
1318 	curr_mcghdl = &state->ts_mcghdl[curr_indx];
1319 
1320 	/* If the first entry in the chain has MGID == 0, then stop */
1321 	if ((curr_mcghdl->mcg_mgid_h == 0) &&
1322 	    (curr_mcghdl->mcg_mgid_l == 0)) {
1323 		goto end_mgid_hash_walk;
1324 	}
1325 
1326 	/* If the first entry in the chain matches the MGID, then stop */
1327 	if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1328 	    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1329 		goto end_mgid_hash_walk;
1330 	}
1331 
1332 	/* Otherwise, walk the hash chain looking for a match */
1333 	while (curr_mcghdl->mcg_next_indx != 0) {
1334 		prev_indx = curr_indx;
1335 		curr_indx = curr_mcghdl->mcg_next_indx;
1336 		curr_mcghdl = &state->ts_mcghdl[curr_indx];
1337 
1338 		if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1339 		    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1340 			break;
1341 		}
1342 	}
1343 
1344 end_mgid_hash_walk:
1345 	/*
1346 	 * If necessary, return the index of the previous entry too.  This
1347 	 * is primarily used for detaching a QP from a multicast group.  It
1348 	 * may be necessary, in that case, to delete an MCG entry from the
1349 	 * hash chain and having the index of the previous entry is helpful.
1350 	 */
1351 	if (p_indx != NULL) {
1352 		*p_indx = prev_indx;
1353 	}
1354 	TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash);
1355 	return (curr_indx);
1356 }
1357 
1358 
1359 /*
1360  * tavor_mcg_setup_new_hdr()
1361  *    Context: Can be called from interrupt or base context.
1362  */
1363 static void
1364 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr,
1365     ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc)
1366 {
1367 	TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr);
1368 
1369 	/*
1370 	 * Fill in the fields of the "shadow" entry used by software
1371 	 * to track MCG hardware entry
1372 	 */
1373 	mcg->mcg_mgid_h	   = mgid.gid_prefix;
1374 	mcg->mcg_mgid_l	   = mgid.gid_guid;
1375 	mcg->mcg_rsrcp	   = mcg_rsrc;
1376 	mcg->mcg_next_indx = 0;
1377 	mcg->mcg_num_qps   = 0;
1378 
1379 	/*
1380 	 * Fill the header fields of the MCG entry (in the temporary copy)
1381 	 */
1382 	mcg_hdr->mgid_h		= mgid.gid_prefix;
1383 	mcg_hdr->mgid_l		= mgid.gid_guid;
1384 	mcg_hdr->next_gid_indx	= 0;
1385 
1386 	TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr);
1387 }
1388 
1389 
1390 /*
1391  * tavor_mcg_hash_list_remove()
1392  *    Context: Can be called only from user or kernel context.
1393  */
1394 static int
1395 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
1396     uint_t prev_indx, tavor_hw_mcg_t *mcg_entry)
1397 {
1398 	tavor_mcghdl_t		curr_mcg, prev_mcg, next_mcg;
1399 	uint_t			next_indx;
1400 	int			status;
1401 
1402 	/* Get the pointer to "shadow" list for current entry */
1403 	curr_mcg = &state->ts_mcghdl[curr_indx];
1404 
1405 	/*
1406 	 * If this is the first entry on a hash chain, then attempt to replace
1407 	 * the entry with the next entry on the chain.  If there are no
1408 	 * subsequent entries on the chain, then this is the only entry and
1409 	 * should be invalidated.
1410 	 */
1411 	if (curr_indx == prev_indx) {
1412 
1413 		/*
1414 		 * If this is the only entry on the chain, then invalidate it.
1415 		 * Note:  Invalidating an MCG entry means writing all zeros
1416 		 * to the entry.  This is only necessary for those MCG
1417 		 * entries that are the "head" entries of the individual hash
1418 		 * chains.  Regardless of whether this operation returns
1419 		 * success or failure, return that result to the caller.
1420 		 */
1421 		next_indx = curr_mcg->mcg_next_indx;
1422 		if (next_indx == 0) {
1423 			status = tavor_mcg_entry_invalidate(state, mcg_entry,
1424 			    curr_indx);
1425 			bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1426 			TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1427 			return (status);
1428 		}
1429 
1430 		/*
1431 		 * Otherwise, this is just the first entry on the chain, so
1432 		 * grab the next one
1433 		 */
1434 		next_mcg = &state->ts_mcghdl[next_indx];
1435 
1436 		/*
1437 		 * Read the next MCG entry into the temporary MCG.  Note:
1438 		 * In general, this operation shouldn't fail.  If it does,
1439 		 * then it is an indication that something (probably in HW,
1440 		 * but maybe in SW) has gone seriously wrong.
1441 		 */
1442 		status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx,
1443 		    TAVOR_CMD_NOSLEEP_SPIN);
1444 		if (status != TAVOR_CMD_SUCCESS) {
1445 			TAVOR_WARNING(state, "failed to read MCG entry");
1446 			cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
1447 			    "%08x\n", status);
1448 			TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1449 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1450 			    tnf_uint, indx, next_indx);
1451 			TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1452 			return (ibc_get_ci_failure(0));
1453 		}
1454 
1455 		/*
1456 		 * Copy/Write the temporary MCG back to the hardware MCG list
1457 		 * using the current index.  This essentially removes the
1458 		 * current MCG entry from the list by writing over it with
1459 		 * the next one.  If this is successful, then we can do the
1460 		 * same operation for the "shadow" list.  And we can also
1461 		 * free up the Tavor MCG entry resource that was associated
1462 		 * with the (old) next entry.  Note:  In general, this
1463 		 * operation shouldn't fail.  If it does, then it is an
1464 		 * indication that something (probably in HW, but maybe in SW)
1465 		 * has gone seriously wrong.
1466 		 */
1467 		status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1468 		    TAVOR_CMD_NOSLEEP_SPIN);
1469 		if (status != TAVOR_CMD_SUCCESS) {
1470 			TAVOR_WARNING(state, "failed to write MCG entry");
1471 			cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1472 			    "%08x\n", status);
1473 			TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1474 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1475 			    tnf_uint, indx, curr_indx);
1476 			TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1477 			return (ibc_get_ci_failure(0));
1478 		}
1479 
1480 		/*
1481 		 * Copy all the software tracking information from the next
1482 		 * entry on the "shadow" MCG list into the current entry on
1483 		 * the list.  Then invalidate (zero out) the other "shadow"
1484 		 * list entry.
1485 		 */
1486 		bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1487 		bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s));
1488 
1489 		/*
1490 		 * Free up the Tavor MCG entry resource used by the "next"
1491 		 * MCG entry.  That resource is no longer needed by any
1492 		 * MCG entry which is first on a hash chain (like the "next"
1493 		 * entry has just become).
1494 		 */
1495 		tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1496 
1497 		TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1498 		return (DDI_SUCCESS);
1499 	}
1500 
1501 	/*
1502 	 * Else if this is the last entry on the hash chain (or a middle
1503 	 * entry, then we update the previous entry's "next_gid_index" field
1504 	 * to make it point instead to the next entry on the chain.  By
1505 	 * skipping over the removed entry in this way, we can then free up
1506 	 * any resources associated with the current entry.  Note:  We don't
1507 	 * need to invalidate the "skipped over" hardware entry because it
1508 	 * will no be longer connected to any hash chains, and if/when it is
1509 	 * finally re-used, it will be written with entirely new values.
1510 	 */
1511 
1512 	/*
1513 	 * Read the next MCG entry into the temporary MCG.  Note:  In general,
1514 	 * this operation shouldn't fail.  If it does, then it is an
1515 	 * indication that something (probably in HW, but maybe in SW) has
1516 	 * gone seriously wrong.
1517 	 */
1518 	status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1519 	    TAVOR_CMD_NOSLEEP_SPIN);
1520 	if (status != TAVOR_CMD_SUCCESS) {
1521 		TAVOR_WARNING(state, "failed to read MCG entry");
1522 		cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1523 		    status);
1524 		TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1525 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1526 		    tnf_uint, indx, prev_indx);
1527 		TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1528 		return (ibc_get_ci_failure(0));
1529 	}
1530 
1531 	/*
1532 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
1533 	 * and attempt to write the entry back into the Tavor MCG table.  If
1534 	 * this succeeds, then we update the "shadow" list to reflect the
1535 	 * change, free up the Tavor MCG entry resource that was associated
1536 	 * with the current entry, and return success.  Note:  In general,
1537 	 * this operation shouldn't fail.  If it does, then it is an indication
1538 	 * that something (probably in HW, but maybe in SW) has gone seriously
1539 	 * wrong.
1540 	 */
1541 	mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1542 	status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1543 	    TAVOR_CMD_NOSLEEP_SPIN);
1544 	if (status != TAVOR_CMD_SUCCESS) {
1545 		TAVOR_WARNING(state, "failed to write MCG entry");
1546 		cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1547 		    status);
1548 		TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1549 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1550 		    tnf_uint, indx, prev_indx);
1551 		TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1552 		return (ibc_get_ci_failure(0));
1553 	}
1554 
1555 	/*
1556 	 * Get the pointer to the "shadow" MCG list entry for the previous
1557 	 * MCG.  Update its "mcg_next_indx" to point to the next entry
1558 	 * the one after the current entry. Note:  This next index may be
1559 	 * zero, indicating the end of the list.
1560 	 */
1561 	prev_mcg = &state->ts_mcghdl[prev_indx];
1562 	prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1563 
1564 	/*
1565 	 * Free up the Tavor MCG entry resource used by the current entry.
1566 	 * This resource is no longer needed because the chain now skips over
1567 	 * the current entry.  Then invalidate (zero out) the current "shadow"
1568 	 * list entry.
1569 	 */
1570 	tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1571 	bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1572 
1573 	TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1574 	return (DDI_SUCCESS);
1575 }
1576 
1577 
1578 /*
1579  * tavor_mcg_entry_invalidate()
1580  *    Context: Can be called only from user or kernel context.
1581  */
1582 static int
1583 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry,
1584     uint_t indx)
1585 {
1586 	int		status;
1587 
1588 	TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate);
1589 
1590 	/*
1591 	 * Invalidate the hardware MCG entry by zeroing out this temporary
1592 	 * MCG and writing it the the hardware.  Note: In general, this
1593 	 * operation shouldn't fail.  If it does, then it is an indication
1594 	 * that something (probably in HW, but maybe in SW) has gone seriously
1595 	 * wrong.
1596 	 */
1597 	bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
1598 	status = tavor_write_mgm_cmd_post(state, mcg_entry, indx,
1599 	    TAVOR_CMD_NOSLEEP_SPIN);
1600 	if (status != TAVOR_CMD_SUCCESS) {
1601 		TAVOR_WARNING(state, "failed to write MCG entry");
1602 		cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1603 		    status);
1604 		TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail,
1605 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1606 		    tnf_uint, indx, indx);
1607 		TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1608 		return (ibc_get_ci_failure(0));
1609 	}
1610 
1611 	TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1612 	return (DDI_SUCCESS);
1613 }
1614 
1615 
1616 /*
1617  * tavor_mgid_is_valid()
1618  *    Context: Can be called from interrupt or base context.
1619  */
1620 static int
1621 tavor_mgid_is_valid(ib_gid_t gid)
1622 {
1623 	uint_t		topbits, flags, scope;
1624 
1625 	TAVOR_TNF_ENTER(tavor_mgid_is_valid);
1626 
1627 	/*
1628 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1629 	 * "multicast GID" must have its top eight bits set to all ones
1630 	 */
1631 	topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) &
1632 	    TAVOR_MCG_TOPBITS_MASK;
1633 	if (topbits != TAVOR_MCG_TOPBITS) {
1634 		TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR,
1635 		    "");
1636 		TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1637 		return (0);
1638 	}
1639 
1640 	/*
1641 	 * The next 4 bits are the "flag" bits.  These are valid only
1642 	 * if they are "0" (which correspond to permanently assigned/
1643 	 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1644 	 * multicast GIDs).  All other values are reserved.
1645 	 */
1646 	flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) &
1647 	    TAVOR_MCG_FLAGS_MASK;
1648 	if (!((flags == TAVOR_MCG_FLAGS_PERM) ||
1649 	    (flags == TAVOR_MCG_FLAGS_NONPERM))) {
1650 		TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR,
1651 		    "", tnf_uint, flags, flags);
1652 		TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1653 		return (0);
1654 	}
1655 
1656 	/*
1657 	 * The next 4 bits are the "scope" bits.  These are valid only
1658 	 * if they are "2" (Link-local), "5" (Site-local), "8"
1659 	 * (Organization-local) or "E" (Global).  All other values
1660 	 * are reserved (or currently unassigned).
1661 	 */
1662 	scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) &
1663 	    TAVOR_MCG_SCOPE_MASK;
1664 	if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) ||
1665 	    (scope == TAVOR_MCG_SCOPE_SITELOC)	 ||
1666 	    (scope == TAVOR_MCG_SCOPE_ORGLOC)	 ||
1667 	    (scope == TAVOR_MCG_SCOPE_GLOBAL))) {
1668 		TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR,
1669 		    "", tnf_uint, scope, scope);
1670 		TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1671 		return (0);
1672 	}
1673 
1674 	/*
1675 	 * If it passes all of the above checks, then we will consider it
1676 	 * a valid multicast GID.
1677 	 */
1678 	TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1679 	return (1);
1680 }
1681 
1682 
1683 /*
1684  * tavor_mlid_is_valid()
1685  *    Context: Can be called from interrupt or base context.
1686  */
1687 static int
1688 tavor_mlid_is_valid(ib_lid_t lid)
1689 {
1690 	TAVOR_TNF_ENTER(tavor_mlid_is_valid);
1691 
1692 	/*
1693 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1694 	 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1695 	 */
1696 	if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1697 		TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR,
1698 		    "", tnf_uint, mlid, lid);
1699 		TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1700 		return (0);
1701 	}
1702 
1703 	TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1704 	return (1);
1705 }
1706 
1707 
1708 /*
1709  * tavor_pd_alloc()
1710  *    Context: Can be called only from user or kernel context.
1711  */
1712 int
1713 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag)
1714 {
1715 	tavor_rsrc_t	*rsrc;
1716 	tavor_pdhdl_t	pd;
1717 	int		status;
1718 
1719 	TAVOR_TNF_ENTER(tavor_pd_alloc);
1720 
1721 	/*
1722 	 * Allocate the software structure for tracking the protection domain
1723 	 * (i.e. the Tavor Protection Domain handle).  By default each PD
1724 	 * structure will have a unique PD number assigned to it.  All that
1725 	 * is necessary is for software to initialize the PD reference count
1726 	 * (to zero) and return success.
1727 	 */
1728 	status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc);
1729 	if (status != DDI_SUCCESS) {
1730 		TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, "");
1731 		TAVOR_TNF_EXIT(tavor_pd_alloc);
1732 		return (IBT_INSUFF_RESOURCE);
1733 	}
1734 	pd = (tavor_pdhdl_t)rsrc->tr_addr;
1735 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1736 
1737 	pd->pd_refcnt = 0;
1738 	*pdhdl = pd;
1739 
1740 	TAVOR_TNF_EXIT(tavor_pd_alloc);
1741 	return (DDI_SUCCESS);
1742 }
1743 
1744 
1745 /*
1746  * tavor_pd_free()
1747  *    Context: Can be called only from user or kernel context.
1748  */
1749 int
1750 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl)
1751 {
1752 	tavor_rsrc_t	*rsrc;
1753 	tavor_pdhdl_t	pd;
1754 
1755 	TAVOR_TNF_ENTER(tavor_pd_free);
1756 
1757 	/*
1758 	 * Pull all the necessary information from the Tavor Protection Domain
1759 	 * handle.  This is necessary here because the resource for the
1760 	 * PD is going to be freed up as part of this operation.
1761 	 */
1762 	pd   = *pdhdl;
1763 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1764 	rsrc = pd->pd_rsrcp;
1765 
1766 	/*
1767 	 * Check the PD reference count.  If the reference count is non-zero,
1768 	 * then it means that this protection domain is still referenced by
1769 	 * some memory region, queue pair, address handle, or other IB object
1770 	 * If it is non-zero, then return an error.  Otherwise, free the
1771 	 * Tavor resource and return success.
1772 	 */
1773 	if (pd->pd_refcnt != 0) {
1774 		TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "",
1775 		    tnf_int, refcnt, pd->pd_refcnt);
1776 		TAVOR_TNF_EXIT(tavor_pd_free);
1777 		return (IBT_PD_IN_USE);
1778 	}
1779 
1780 	/* Free the Tavor Protection Domain handle */
1781 	tavor_rsrc_free(state, &rsrc);
1782 
1783 	/* Set the pdhdl pointer to NULL and return success */
1784 	*pdhdl = (tavor_pdhdl_t)NULL;
1785 
1786 	TAVOR_TNF_EXIT(tavor_pd_free);
1787 	return (DDI_SUCCESS);
1788 }
1789 
1790 
1791 /*
1792  * tavor_pd_refcnt_inc()
1793  *    Context: Can be called from interrupt or base context.
1794  */
1795 void
1796 tavor_pd_refcnt_inc(tavor_pdhdl_t pd)
1797 {
1798 	/* Increment the protection domain's reference count */
1799 	mutex_enter(&pd->pd_lock);
1800 	TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "",
1801 	    tnf_uint, refcnt, pd->pd_refcnt);
1802 	pd->pd_refcnt++;
1803 	mutex_exit(&pd->pd_lock);
1804 
1805 }
1806 
1807 
1808 /*
1809  * tavor_pd_refcnt_dec()
1810  *    Context: Can be called from interrupt or base context.
1811  */
1812 void
1813 tavor_pd_refcnt_dec(tavor_pdhdl_t pd)
1814 {
1815 	/* Decrement the protection domain's reference count */
1816 	mutex_enter(&pd->pd_lock);
1817 	pd->pd_refcnt--;
1818 	TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "",
1819 	    tnf_uint, refcnt, pd->pd_refcnt);
1820 	mutex_exit(&pd->pd_lock);
1821 
1822 }
1823 
1824 
1825 /*
1826  * tavor_port_query()
1827  *    Context: Can be called only from user or kernel context.
1828  */
1829 int
1830 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1831 {
1832 	sm_portinfo_t		portinfo;
1833 	sm_guidinfo_t		guidinfo;
1834 	sm_pkey_table_t		pkeytable;
1835 	ib_gid_t		*sgid;
1836 	uint_t			sgid_max, pkey_max, tbl_size;
1837 	int			i, j, indx, status;
1838 
1839 	TAVOR_TNF_ENTER(tavor_port_query);
1840 
1841 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1842 
1843 	/* Validate that specified port number is legal */
1844 	if (!tavor_portnum_is_valid(state, port)) {
1845 		TNF_PROBE_1(tavor_port_query_inv_portnum_fail,
1846 		    TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1847 		TAVOR_TNF_EXIT(tavor_port_query);
1848 		return (IBT_HCA_PORT_INVALID);
1849 	}
1850 
1851 	/*
1852 	 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD
1853 	 * to the firmware (for the specified port number).  This returns
1854 	 * a full PortInfo MAD (in "portinfo") which we subsequently
1855 	 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1856 	 * to the IBTF.
1857 	 */
1858 	status = tavor_getportinfo_cmd_post(state, port,
1859 	    TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1860 	if (status != TAVOR_CMD_SUCCESS) {
1861 		cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command "
1862 		    "failed: %08x\n", port, status);
1863 		TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail,
1864 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1865 		TAVOR_TNF_EXIT(tavor_port_query);
1866 		return (ibc_get_ci_failure(0));
1867 	}
1868 
1869 	/*
1870 	 * Parse the PortInfo MAD and fill in the IBTF structure
1871 	 */
1872 	pi->p_base_lid		= portinfo.LID;
1873 	pi->p_qkey_violations	= portinfo.Q_KeyViolations;
1874 	pi->p_pkey_violations	= portinfo.P_KeyViolations;
1875 	pi->p_sm_sl		= portinfo.MasterSMSL;
1876 	pi->p_sm_lid		= portinfo.MasterSMLID;
1877 	pi->p_linkstate		= portinfo.PortState;
1878 	pi->p_port_num		= portinfo.LocalPortNum;
1879 	pi->p_phys_state	= portinfo.PortPhysicalState;
1880 	pi->p_width_supported	= portinfo.LinkWidthSupported;
1881 	pi->p_width_enabled	= portinfo.LinkWidthEnabled;
1882 	pi->p_width_active	= portinfo.LinkWidthActive;
1883 	pi->p_speed_supported	= portinfo.LinkSpeedSupported;
1884 	pi->p_speed_enabled	= portinfo.LinkSpeedEnabled;
1885 	pi->p_speed_active	= portinfo.LinkSpeedActive;
1886 	pi->p_mtu		= portinfo.MTUCap;
1887 	pi->p_lmc		= portinfo.LMC;
1888 	pi->p_max_vl		= portinfo.VLCap;
1889 	pi->p_subnet_timeout	= portinfo.SubnetTimeOut;
1890 	pi->p_msg_sz		= ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ);
1891 	tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl;
1892 	pi->p_sgid_tbl_sz	= (1 << tbl_size);
1893 	tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl;
1894 	pi->p_pkey_tbl_sz	= (1 << tbl_size);
1895 
1896 	/*
1897 	 * Convert InfiniBand-defined port capability flags to the format
1898 	 * specified by the IBTF
1899 	 */
1900 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1901 		pi->p_capabilities |= IBT_PORT_CAP_SM;
1902 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1903 		pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1904 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1905 		pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1906 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1907 		pi->p_capabilities |= IBT_PORT_CAP_DM;
1908 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1909 		pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1910 
1911 	/*
1912 	 * Fill in the SGID table.  Since the only access to the Tavor
1913 	 * GID tables is through the firmware's MAD_IFC interface, we
1914 	 * post as many GetGUIDInfo MADs as necessary to read in the entire
1915 	 * contents of the SGID table (for the specified port).  Note:  The
1916 	 * GetGUIDInfo command only gets eight GUIDs per operation.  These
1917 	 * GUIDs are then appended to the GID prefix for the port (from the
1918 	 * GetPortInfo above) to form the entire SGID table.
1919 	 */
1920 	for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1921 		status = tavor_getguidinfo_cmd_post(state, port, i >> 3,
1922 		    TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1923 		if (status != TAVOR_CMD_SUCCESS) {
1924 			cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) "
1925 			    "command failed: %08x\n", port, status);
1926 			TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail,
1927 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1928 			TAVOR_TNF_EXIT(tavor_port_query);
1929 			return (ibc_get_ci_failure(0));
1930 		}
1931 
1932 		/* Figure out how many of the entries are valid */
1933 		sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1934 		for (j = 0; j < sgid_max; j++) {
1935 			indx = (i + j);
1936 			sgid = &pi->p_sgid_tbl[indx];
1937 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
1938 			sgid->gid_prefix = portinfo.GidPrefix;
1939 			sgid->gid_guid	 = guidinfo.GUIDBlocks[j];
1940 		}
1941 	}
1942 
1943 	/*
1944 	 * Fill in the PKey table.  Just as for the GID tables above, the
1945 	 * only access to the Tavor PKey tables is through the firmware's
1946 	 * MAD_IFC interface.  We post as many GetPKeyTable MADs as necessary
1947 	 * to read in the entire contents of the PKey table (for the specified
1948 	 * port).  Note:  The GetPKeyTable command only gets 32 PKeys per
1949 	 * operation.
1950 	 */
1951 	for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
1952 		status = tavor_getpkeytable_cmd_post(state, port, i,
1953 		    TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
1954 		if (status != TAVOR_CMD_SUCCESS) {
1955 			cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) "
1956 			    "command failed: %08x\n", port, status);
1957 			TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail,
1958 			    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1959 			TAVOR_TNF_EXIT(tavor_port_query);
1960 			return (ibc_get_ci_failure(0));
1961 		}
1962 
1963 		/* Figure out how many of the entries are valid */
1964 		pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
1965 		for (j = 0; j < pkey_max; j++) {
1966 			indx = (i + j);
1967 			pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j];
1968 		}
1969 	}
1970 
1971 	TAVOR_TNF_EXIT(tavor_port_query);
1972 	return (DDI_SUCCESS);
1973 }
1974 
1975 
1976 /*
1977  * tavor_port_modify()
1978  *    Context: Can be called only from user or kernel context.
1979  */
1980 /* ARGSUSED */
1981 int
1982 tavor_port_modify(tavor_state_t *state, uint8_t port,
1983     ibt_port_modify_flags_t flags, uint8_t init_type)
1984 {
1985 	sm_portinfo_t	portinfo;
1986 	uint32_t	capmask, reset_qkey;
1987 	int		status;
1988 
1989 	TAVOR_TNF_ENTER(tavor_port_modify);
1990 
1991 	/*
1992 	 * Return an error if either of the unsupported flags are set
1993 	 */
1994 	if ((flags & IBT_PORT_SHUTDOWN) ||
1995 	    (flags & IBT_PORT_SET_INIT_TYPE)) {
1996 		TNF_PROBE_1(tavor_port_modify_inv_flags_fail,
1997 		    TAVOR_TNF_ERROR, "", tnf_uint, flags, flags);
1998 		TAVOR_TNF_EXIT(tavor_port_modify);
1999 		return (IBT_NOT_SUPPORTED);
2000 	}
2001 
2002 	/*
2003 	 * Determine whether we are trying to reset the QKey counter
2004 	 */
2005 	reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0;
2006 
2007 	/* Validate that specified port number is legal */
2008 	if (!tavor_portnum_is_valid(state, port)) {
2009 		TNF_PROBE_1(tavor_port_modify_inv_portnum_fail,
2010 		    TAVOR_TNF_ERROR, "", tnf_uint, port, port);
2011 		TAVOR_TNF_EXIT(tavor_port_modify);
2012 		return (IBT_HCA_PORT_INVALID);
2013 	}
2014 
2015 	/*
2016 	 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the
2017 	 * firmware (for the specified port number).  This returns a full
2018 	 * PortInfo MAD (in "portinfo") from which we pull the current
2019 	 * capability mask.  We then modify the capability mask as directed
2020 	 * by the "pmod_flags" field, and write the updated capability mask
2021 	 * using the Tavor SET_IB command (below).
2022 	 */
2023 	status = tavor_getportinfo_cmd_post(state, port,
2024 	    TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2025 	if (status != TAVOR_CMD_SUCCESS) {
2026 		TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail,
2027 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2028 		TAVOR_TNF_EXIT(tavor_port_modify);
2029 		return (ibc_get_ci_failure(0));
2030 	}
2031 
2032 	/*
2033 	 * Convert InfiniBand-defined port capability flags to the format
2034 	 * specified by the IBTF.  Specifically, we modify the capability
2035 	 * mask based on the specified values.
2036 	 */
2037 	capmask = portinfo.CapabilityMask;
2038 
2039 	if (flags & IBT_PORT_RESET_SM)
2040 		capmask &= ~SM_CAP_MASK_IS_SM;
2041 	else if (flags & IBT_PORT_SET_SM)
2042 		capmask |= SM_CAP_MASK_IS_SM;
2043 
2044 	if (flags & IBT_PORT_RESET_SNMP)
2045 		capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2046 	else if (flags & IBT_PORT_SET_SNMP)
2047 		capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2048 
2049 	if (flags & IBT_PORT_RESET_DEVMGT)
2050 		capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2051 	else if (flags & IBT_PORT_SET_DEVMGT)
2052 		capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2053 
2054 	if (flags & IBT_PORT_RESET_VENDOR)
2055 		capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2056 	else if (flags & IBT_PORT_SET_VENDOR)
2057 		capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2058 
2059 	/*
2060 	 * Use the Tavor SET_IB command to update the capability mask and
2061 	 * (possibly) reset the QKey violation counter for the specified port.
2062 	 * Note: In general, this operation shouldn't fail.  If it does, then
2063 	 * it is an indication that something (probably in HW, but maybe in
2064 	 * SW) has gone seriously wrong.
2065 	 */
2066 	status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey,
2067 	    TAVOR_SLEEPFLAG_FOR_CONTEXT());
2068 	if (status != TAVOR_CMD_SUCCESS) {
2069 		TAVOR_WARNING(state, "failed to modify port capabilities");
2070 		cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: "
2071 		    "%08x\n", port, status);
2072 		TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail,
2073 		    TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2074 		TAVOR_TNF_EXIT(tavor_port_modify);
2075 		return (ibc_get_ci_failure(0));
2076 	}
2077 
2078 	TAVOR_TNF_EXIT(tavor_port_modify);
2079 	return (DDI_SUCCESS);
2080 }
2081 
2082 
2083 /*
2084  * tavor_set_addr_path()
2085  *    Context: Can be called from interrupt or base context.
2086  *
2087  * Note: This routine is used for two purposes.  It is used to fill in the
2088  * Tavor UDAV fields, and it is used to fill in the address path information
2089  * for QPs.  Because the two Tavor structures are similar, common fields can
2090  * be filled in here.  Because they are slightly different, however, we pass
2091  * an additional flag to indicate which type is being filled.
2092  */
2093 int
2094 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av,
2095     tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp)
2096 {
2097 	uint_t		gidtbl_sz;
2098 
2099 	TAVOR_TNF_ENTER(tavor_set_addr_path);
2100 
2101 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2102 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2103 
2104 	path->ml_path	= av->av_src_path;
2105 	path->rlid	= av->av_dlid;
2106 	path->sl	= av->av_srvl;
2107 
2108 	/* Port number only valid (in "av_port_num") if this is a UDAV */
2109 	if (type == TAVOR_ADDRPATH_UDAV) {
2110 		path->portnum = av->av_port_num;
2111 	}
2112 
2113 	/*
2114 	 * Validate (and fill in) static rate.
2115 	 *
2116 	 * The stat_rate_sup is used to decide how to set the rate and
2117 	 * if it is zero, the driver uses the old interface.
2118 	 */
2119 	if (state->ts_devlim.stat_rate_sup) {
2120 		if (av->av_srate == IBT_SRATE_20) {
2121 			path->max_stat_rate = 0; /* 4x@DDR injection rate */
2122 		} else if (av->av_srate == IBT_SRATE_5) {
2123 			path->max_stat_rate = 3; /* 1x@DDR injection rate */
2124 		} else if (av->av_srate == IBT_SRATE_10) {
2125 			path->max_stat_rate = 2; /* 4x@SDR injection rate */
2126 		} else if (av->av_srate == IBT_SRATE_2) {
2127 			path->max_stat_rate = 1; /* 1x@SDR injection rate */
2128 		} else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2129 			path->max_stat_rate = 0; /* Max */
2130 		} else {
2131 			TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2132 			    TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2133 			TAVOR_TNF_EXIT(tavor_set_addr_path);
2134 			return (IBT_STATIC_RATE_INVALID);
2135 		}
2136 	} else {
2137 		if (av->av_srate == IBT_SRATE_10) {
2138 			path->max_stat_rate = 0; /* 4x@SDR injection rate */
2139 		} else if (av->av_srate == IBT_SRATE_2) {
2140 			path->max_stat_rate = 1; /* 1x@SDR injection rate */
2141 		} else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2142 			path->max_stat_rate = 0; /* Max */
2143 		} else {
2144 			TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2145 			    TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2146 			TAVOR_TNF_EXIT(tavor_set_addr_path);
2147 			return (IBT_STATIC_RATE_INVALID);
2148 		}
2149 	}
2150 
2151 	/*
2152 	 * If this is a QP operation save asoft copy.
2153 	 */
2154 	if (qp) {
2155 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2156 		qp->qp_save_srate = av->av_srate;
2157 	}
2158 
2159 	/* If "grh" flag is set, then check for valid SGID index too */
2160 	gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2161 	if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2162 		TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail,
2163 		    TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix);
2164 		TAVOR_TNF_EXIT(tavor_set_addr_path);
2165 		return (IBT_SGID_INVALID);
2166 	}
2167 
2168 	/*
2169 	 * Fill in all "global" values regardless of the value in the GRH
2170 	 * flag.  Because "grh" is not set unless "av_send_grh" is set, the
2171 	 * hardware will ignore the other "global" values as necessary.  Note:
2172 	 * SW does this here to enable later query operations to return
2173 	 * exactly the same params that were passed when the addr path was
2174 	 * last written.
2175 	 */
2176 	path->grh = av->av_send_grh;
2177 	if (type == TAVOR_ADDRPATH_QP) {
2178 		path->mgid_index = av->av_sgid_ix;
2179 	} else {
2180 		/*
2181 		 * For Tavor UDAV, the "mgid_index" field is the index into
2182 		 * a combined table (not a per-port table). So some extra
2183 		 * calculations are necessary.
2184 		 */
2185 		path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2186 		    av->av_sgid_ix;
2187 	}
2188 	path->flow_label = av->av_flow;
2189 	path->tclass	 = av->av_tclass;
2190 	path->hop_limit	 = av->av_hop;
2191 	path->rgid_h	 = av->av_dgid.gid_prefix;
2192 
2193 	/*
2194 	 * According to Tavor PRM, the (31:0) part of rgid_l must be set to
2195 	 * "0x2" if the 'grh' or 'g' bit is cleared.  It also says that we
2196 	 * only need to do it for UDAV's.  So we enforce that here.
2197 	 *
2198 	 * NOTE: The entire 64 bits worth of GUID info is actually being
2199 	 * preserved (for UDAVs) by the callers of this function
2200 	 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the
2201 	 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2202 	 * "don't care".
2203 	 */
2204 	if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) {
2205 		path->rgid_l = av->av_dgid.gid_guid;
2206 	} else {
2207 		path->rgid_l = 0x2;
2208 	}
2209 
2210 	TAVOR_TNF_EXIT(tavor_set_addr_path);
2211 	return (DDI_SUCCESS);
2212 }
2213 
2214 
2215 /*
2216  * tavor_get_addr_path()
2217  *    Context: Can be called from interrupt or base context.
2218  *
2219  * Note: Just like tavor_set_addr_path() above, this routine is used for two
2220  * purposes.  It is used to read in the Tavor UDAV fields, and it is used to
2221  * read in the address path information for QPs.  Because the two Tavor
2222  * structures are similar, common fields can be read in here.  But because
2223  * they are slightly different, we pass an additional flag to indicate which
2224  * type is being read.
2225  */
2226 void
2227 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path,
2228     ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp)
2229 {
2230 	uint_t		gidtbl_sz;
2231 
2232 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2233 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2234 
2235 	av->av_src_path	= path->ml_path;
2236 	av->av_port_num	= path->portnum;
2237 	av->av_dlid	= path->rlid;
2238 	av->av_srvl	= path->sl;
2239 
2240 	/*
2241 	 * Set "av_ipd" value from max_stat_rate.
2242 	 */
2243 	if (qp) {
2244 		/*
2245 		 * If a QP operation use the soft copy
2246 		 */
2247 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2248 		av->av_srate = qp->qp_save_srate;
2249 	} else {
2250 		/*
2251 		 * The stat_rate_sup is used to decide how the srate value is
2252 		 * set and
2253 		 * if it is zero, the driver uses the old interface.
2254 		 */
2255 		if (state->ts_devlim.stat_rate_sup) {
2256 			if (path->max_stat_rate	== 0) {
2257 				av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */
2258 			} else if (path->max_stat_rate	== 1) {
2259 				av->av_srate = IBT_SRATE_2;  /* 1x@SDR rate */
2260 			} else if (path->max_stat_rate	== 2) {
2261 				av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2262 			} else if (path->max_stat_rate	== 3) {
2263 				av->av_srate = IBT_SRATE_5;  /* 1xDDR rate */
2264 			}
2265 		} else {
2266 			if (path->max_stat_rate	== 0) {
2267 				av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2268 			} else if (path->max_stat_rate	== 1) {
2269 				av->av_srate = IBT_SRATE_2;  /* 1x@SDR rate */
2270 			}
2271 		}
2272 	}
2273 
2274 	/*
2275 	 * Extract all "global" values regardless of the value in the GRH
2276 	 * flag.  Because "av_send_grh" is set only if "grh" is set, software
2277 	 * knows to ignore the other "global" values as necessary.  Note: SW
2278 	 * does it this way to enable these query operations to return exactly
2279 	 * the same params that were passed when the addr path was last written.
2280 	 */
2281 	av->av_send_grh		= path->grh;
2282 	if (type == TAVOR_ADDRPATH_QP) {
2283 		av->av_sgid_ix  = path->mgid_index;
2284 	} else {
2285 		/*
2286 		 * For Tavor UDAV, the "mgid_index" field is the index into
2287 		 * a combined table (not a per-port table). So some extra
2288 		 * calculations are necessary.
2289 		 */
2290 		gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2291 		av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2292 		    gidtbl_sz);
2293 	}
2294 	av->av_flow		= path->flow_label;
2295 	av->av_tclass		= path->tclass;
2296 	av->av_hop		= path->hop_limit;
2297 	av->av_dgid.gid_prefix	= path->rgid_h;
2298 	av->av_dgid.gid_guid	= path->rgid_l;
2299 }
2300 
2301 
2302 /*
2303  * tavor_portnum_is_valid()
2304  *    Context: Can be called from interrupt or base context.
2305  */
2306 int
2307 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum)
2308 {
2309 	uint_t	max_port;
2310 
2311 	max_port = state->ts_cfg_profile->cp_num_ports;
2312 	if ((portnum <= max_port) && (portnum != 0)) {
2313 		return (1);
2314 	} else {
2315 		return (0);
2316 	}
2317 }
2318 
2319 
2320 /*
2321  * tavor_pkeyindex_is_valid()
2322  *    Context: Can be called from interrupt or base context.
2323  */
2324 int
2325 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx)
2326 {
2327 	uint_t	max_pkeyindx;
2328 
2329 	max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl;
2330 	if (pkeyindx < max_pkeyindx) {
2331 		return (1);
2332 	} else {
2333 		return (0);
2334 	}
2335 }
2336 
2337 
2338 /*
2339  * tavor_queue_alloc()
2340  *    Context: Can be called from interrupt or base context.
2341  */
2342 int
2343 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info,
2344     uint_t sleepflag)
2345 {
2346 	ddi_dma_attr_t		dma_attr;
2347 	int			(*callback)(caddr_t);
2348 	uint64_t		realsize, alloc_mask;
2349 	uint_t			dma_xfer_mode, type;
2350 	int			flag, status;
2351 
2352 	TAVOR_TNF_ENTER(tavor_queue_alloc);
2353 
2354 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2355 
2356 	/* Set the callback flag appropriately */
2357 	callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP :
2358 	    DDI_DMA_DONTWAIT;
2359 
2360 	/*
2361 	 * Initialize many of the default DMA attributes.  Then set additional
2362 	 * alignment restrictions as necessary for the queue memory.  Also
2363 	 * respect the configured value for IOMMU bypass
2364 	 */
2365 	tavor_dma_attr_init(&dma_attr);
2366 	dma_attr.dma_attr_align = qa_info->qa_bind_align;
2367 	type = state->ts_cfg_profile->cp_iommu_bypass;
2368 	if (type == TAVOR_BINDMEM_BYPASS) {
2369 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2370 	}
2371 
2372 	/* Allocate a DMA handle */
2373 	status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL,
2374 	    &qa_info->qa_dmahdl);
2375 	if (status != DDI_SUCCESS) {
2376 		TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, "");
2377 		TAVOR_TNF_EXIT(tavor_queue_alloc);
2378 		return (DDI_FAILURE);
2379 	}
2380 
2381 	/*
2382 	 * Determine the amount of memory to allocate, depending on the values
2383 	 * in "qa_bind_align" and "qa_alloc_align".  The problem we are trying
2384 	 * to solve here is that allocating a DMA handle with IOMMU bypass
2385 	 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2386 	 * that are less than the page size.  Since we may need stricter
2387 	 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in
2388 	 * Tavor QP work queue memory allocation), we use the following method
2389 	 * to calculate how much additional memory to request, and we enforce
2390 	 * our own alignment on the allocated result.
2391 	 */
2392 	alloc_mask = qa_info->qa_alloc_align - 1;
2393 	if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2394 		realsize = qa_info->qa_size;
2395 	} else {
2396 		realsize = qa_info->qa_size + alloc_mask;
2397 	}
2398 
2399 	/*
2400 	 * If we are to allocate the queue from system memory, then use
2401 	 * ddi_dma_mem_alloc() to find the space.  Otherwise, if we are to
2402 	 * allocate the queue from locally-attached DDR memory, then use the
2403 	 * vmem allocator to find the space.  In either case, return a pointer
2404 	 * to the memory range allocated (including any necessary alignment
2405 	 * adjustments), the "real" memory pointer, the "real" size, and a
2406 	 * ddi_acc_handle_t to use when reading from/writing to the memory.
2407 	 */
2408 	if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2409 
2410 		/*
2411 		 * Determine whether to map STREAMING or CONSISTENT.  This is
2412 		 * based on the value set in the configuration profile at
2413 		 * attach time.
2414 		 */
2415 		dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
2416 
2417 		/* Allocate system memory for the queue */
2418 		status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2419 		    &state->ts_reg_accattr, dma_xfer_mode, callback, NULL,
2420 		    (caddr_t *)&qa_info->qa_buf_real,
2421 		    (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2422 		if (status != DDI_SUCCESS) {
2423 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2424 			TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail,
2425 			    TAVOR_TNF_ERROR, "");
2426 			TAVOR_TNF_EXIT(tavor_queue_alloc);
2427 			return (DDI_FAILURE);
2428 		}
2429 
2430 		/*
2431 		 * Save temporary copy of the real pointer.  (This may be
2432 		 * modified in the last step below).
2433 		 */
2434 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2435 
2436 	} else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2437 
2438 		/* Allocate userland mappable memory for the queue */
2439 		flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP :
2440 		    DDI_UMEM_NOSLEEP;
2441 		qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2442 		    &qa_info->qa_umemcookie);
2443 		if (qa_info->qa_buf_real == NULL) {
2444 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2445 			TNF_PROBE_0(tavor_queue_alloc_umem_fail,
2446 			    TAVOR_TNF_ERROR, "");
2447 			TAVOR_TNF_EXIT(tavor_queue_alloc);
2448 			return (DDI_FAILURE);
2449 		}
2450 
2451 		/*
2452 		 * Save temporary copy of the real pointer.  (This may be
2453 		 * modified in the last step below).
2454 		 */
2455 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2456 
2457 	} else {  /* TAVOR_QUEUE_LOCATION_INDDR */
2458 
2459 		/* Allocate DDR memory for the queue */
2460 		flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP;
2461 		qa_info->qa_buf_real = (uint32_t *)vmem_xalloc(
2462 		    state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0,
2463 		    NULL, NULL, flag);
2464 		if (qa_info->qa_buf_real == NULL) {
2465 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2466 			TNF_PROBE_0(tavor_queue_alloc_vmxa_fail,
2467 			    TAVOR_TNF_ERROR, "");
2468 			TAVOR_TNF_EXIT(tavor_queue_alloc);
2469 			return (DDI_FAILURE);
2470 		}
2471 
2472 		/*
2473 		 * Since "qa_buf_real" will be a PCI address (the offset into
2474 		 * the DDR memory), we first need to do some calculations to
2475 		 * convert it to its kernel mapped address.  (Note: This may
2476 		 * be modified again below, when any additional "alloc"
2477 		 * alignment constraint is applied).
2478 		 */
2479 		qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2480 		    state->ts_reg_ddr_baseaddr) + ((uintptr_t)
2481 		    qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr));
2482 		qa_info->qa_buf_realsz	= realsize;
2483 		qa_info->qa_acchdl	= state->ts_reg_ddrhdl;
2484 	}
2485 
2486 	/*
2487 	 * The last step is to ensure that the final address ("qa_buf_aligned")
2488 	 * has the appropriate "alloc" alignment restriction applied to it
2489 	 * (if necessary).
2490 	 */
2491 	if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2492 		qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2493 		    qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2494 	}
2495 
2496 	TAVOR_TNF_EXIT(tavor_queue_alloc);
2497 	return (DDI_SUCCESS);
2498 }
2499 
2500 
2501 /*
2502  * tavor_queue_free()
2503  *    Context: Can be called from interrupt or base context.
2504  */
2505 void
2506 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info)
2507 {
2508 	TAVOR_TNF_ENTER(tavor_queue_free);
2509 
2510 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2511 
2512 	/*
2513 	 * Depending on how (i.e. from where) we allocated the memory for
2514 	 * this queue, we choose the appropriate method for releasing the
2515 	 * resources.
2516 	 */
2517 	if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2518 
2519 		ddi_dma_mem_free(&qa_info->qa_acchdl);
2520 
2521 	} else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2522 
2523 		ddi_umem_free(qa_info->qa_umemcookie);
2524 
2525 	} else {  /* TAVOR_QUEUE_LOCATION_INDDR */
2526 
2527 		vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real,
2528 		    qa_info->qa_buf_realsz);
2529 	}
2530 
2531 	/* Always free the dma handle */
2532 	ddi_dma_free_handle(&qa_info->qa_dmahdl);
2533 
2534 	TAVOR_TNF_EXIT(tavor_queue_free);
2535 }
2536 
2537 
2538 /*
2539  * tavor_dmaattr_get()
2540  *    Context: Can be called from interrupt or base context.
2541  */
2542 void
2543 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr)
2544 {
2545 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
2546 
2547 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
2548 	dma_attr->dma_attr_addr_lo	= 0;
2549 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
2550 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
2551 	dma_attr->dma_attr_align	= 1;
2552 	dma_attr->dma_attr_burstsizes	= 0x3FF;
2553 	dma_attr->dma_attr_minxfer	= 1;
2554 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
2555 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
2556 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
2557 	dma_attr->dma_attr_granular	= 1;
2558 	dma_attr->dma_attr_flags	= 0;
2559 }
2560