1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 /*
28 * This file implements the MAD receive logic in IBMF.
29 */
30
31 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
32 #include <sys/ib/mgt/ibmf/ibmf_saa_impl.h>
33
34 extern ibmf_state_t *ibmf_statep;
35 extern int ibmf_recv_wqes_per_port;
36 extern int ibmf_send_wqes_posted_per_qp;
37 extern int ibmf_recv_wqes_posted_per_qp;
38
39 #define IBMF_RECV_WR_ID_TO_ADDR(id, ptr) \
40 (ptr) = (void *)(uintptr_t)((uint64_t)(id) & ~IBMF_RCV_CQE)
41
42 #define IBMF_QP0_NUM 0
43 #define IBMF_QP1_NUM 1
44 #define IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT 0x00000001
45 #define IBMF_BM_MAD_ATTR_MOD_RESP 0x1
46
47 /*
48 * Structure defintion of entries in the module names table
49 */
50 typedef struct _ibmf_mod_names_t {
51 char mod_name[8];
52 ibmf_client_type_t mgt_class;
53 } ibmf_mod_names_t;
54
55 typedef struct _ibmf_mod_load_args_t {
56 ibmf_ci_t *cip;
57 ibmf_recv_wqe_t *recv_wqep;
58 char *modname;
59 ibmf_client_type_t ibmf_class;
60 } ibmf_mod_load_args_t;
61
62 extern int ibmf_trace_level;
63 extern int ibmf_send_wqes_posted_per_qp;
64 extern int ibmf_recv_wqes_posted_per_qp;
65
66 static void ibmf_i_do_recv_cb(void *taskq_arg);
67 static int ibmf_i_repost_recv_buffer(ibmf_ci_t *cip,
68 ibmf_recv_wqe_t *recv_wqep);
69 static int ibmf_i_get_class(ib_mad_hdr_t *madhdrp,
70 ibmf_qp_handle_t dest_ibmf_qp_handle, ib_lid_t slid,
71 ibmf_client_type_t *dest_classp);
72 static void ibmf_i_handle_non_rmpp(ibmf_client_t *clientp,
73 ibmf_msg_impl_t *msgimplp, uchar_t *mad);
74 static void ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class,
75 char *modname);
76 static void ibmf_module_load(void *taskq_arg);
77 static void ibmf_send_busy(ibmf_mod_load_args_t *modlargsp);
78
79 #define AGENT_CLASS(class) \
80 (((class & 0x000F0000) == IBMF_AGENT_ID))
81 #define MANAGER_CLASS(class) \
82 (((class & 0x000F0000) == IBMF_MANAGER_ID))
83 #define AGENT_MANAGER_CLASS(class) \
84 (((class & 0x000F0000) == IBMF_AGENT_MANAGER_ID))
85 #define IS_MANDATORY_CLASS(class) \
86 ((class == PERF_AGENT) || (class == BM_AGENT))
87
88 char ibmf_client_modname[16];
89
90 /*
91 * ibmf_i_handle_recv_completion():
92 * Process the WQE from the RQ, obtain the management class of the
93 * packet and retrieve the corresponding client context
94 */
95 void
ibmf_i_handle_recv_completion(ibmf_ci_t * cip,ibt_wc_t * wcp)96 ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp)
97 {
98 int ret;
99 ibmf_client_type_t class;
100 ibmf_client_t *clientp;
101 ib_mad_hdr_t *madhdrp;
102 ibmf_recv_wqe_t *recv_wqep;
103 ibt_recv_wr_t *rwrp;
104 ibmf_qp_handle_t ibmf_qp_handle;
105 struct kmem_cache *kmem_cachep;
106 ibmf_alt_qp_t *altqp;
107
108 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
109 ibmf_i_handle_recv_completion_start, IBMF_TNF_TRACE, "",
110 "ibmf_i_handle_recv_completion() enter, cip = %p, wcp = %p\n",
111 tnf_opaque, cip, cip, tnf_opaque, wcp, wcp);
112
113 mutex_enter(&cip->ci_ud_dest_list_mutex);
114 if (cip->ci_ud_dest_list_count < IBMF_UD_DEST_LO_WATER_MARK) {
115 ret = ibmf_ud_dest_tq_disp(cip);
116 if (ret == 0) {
117 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
118 ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
119 "", "ibmf_i_handle_recv_completion(): %s\n",
120 tnf_string, msg, "taskq dispatch of ud_dest "
121 "population thread failed");
122 }
123 }
124 mutex_exit(&cip->ci_ud_dest_list_mutex);
125
126 ASSERT(IBMF_IS_RECV_WR_ID(wcp->wc_id));
127 IBMF_RECV_WR_ID_TO_ADDR(wcp->wc_id, recv_wqep);
128
129 rwrp = &recv_wqep->recv_wr;
130
131 /* Retrieve the QP handle from the receive WQE context */
132 ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
133
134 /* Get the WQE kmem cache pointer based on the QP type */
135 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
136 kmem_cachep = cip->ci_recv_wqes_cache;
137 } else {
138 altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
139 kmem_cachep = altqp->isq_recv_wqes_cache;
140 }
141
142 /*
143 * if the wqe is being flushed due to shutting down of the qp, free
144 * the wqe and return.
145 */
146 if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
147 kmem_free(rwrp->wr_sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
148 sizeof (ibt_wr_ds_t));
149 kmem_cache_free(kmem_cachep, recv_wqep);
150 mutex_enter(&cip->ci_mutex);
151 IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
152 mutex_exit(&cip->ci_mutex);
153 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
154 mutex_enter(&cip->ci_mutex);
155 cip->ci_wqes_alloced--;
156 if (cip->ci_wqes_alloced == 0)
157 cv_signal(&cip->ci_wqes_cv);
158 mutex_exit(&cip->ci_mutex);
159 } else {
160 mutex_enter(&altqp->isq_mutex);
161 altqp->isq_wqes_alloced--;
162 if (altqp->isq_wqes_alloced == 0)
163 cv_signal(&altqp->isq_wqes_cv);
164 mutex_exit(&altqp->isq_mutex);
165 }
166 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
167 ibmf_i_handle_recv_completion, IBMF_TNF_TRACE,
168 "", "ibmf_i_handle_recv_completion(): %s\n",
169 tnf_string, msg, "recv wqe flushed");
170 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
171 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
172 "", "ibmf_i_handle_recv_completion() exit\n");
173 return;
174 }
175
176 /*
177 * Dynamic Posting of WQEs to the Receive Queue (RQ) of the QP:
178 * If the number of RQ WQEs posted to the QP drops below half
179 * the initial number of RQ WQEs posted to the QP, then, one additional
180 * WQE is posted to the RQ of the QP while processing this CQE.
181 */
182 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
183 ibmf_qp_t *qpp = recv_wqep->recv_qpp;
184
185 mutex_enter(&qpp->iq_mutex);
186 qpp->iq_rwqes_posted--;
187 if (qpp->iq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
188 mutex_exit(&qpp->iq_mutex);
189
190 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
191 ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
192 "ibmf_i_handle_recv_compl(): %s, "
193 "QP# = %d\n", tnf_string, msg,
194 "Posting more RQ WQEs",
195 tnf_int, qpnum, qpp->iq_qp_num);
196
197 /* Post an additional WQE to the RQ */
198 ret = ibmf_i_post_recv_buffer(cip, qpp,
199 B_FALSE, ibmf_qp_handle);
200 if (ret != IBMF_SUCCESS) {
201 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
202 ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
203 "", "ibmf_i_handle_recv_compl(): %s, "
204 "status = %d\n", tnf_string, msg,
205 "ibmf_i_post_recv_buffer() failed",
206 tnf_int, status, ret);
207 }
208
209 mutex_enter(&qpp->iq_mutex);
210 }
211 mutex_exit(&qpp->iq_mutex);
212 } else {
213 mutex_enter(&altqp->isq_mutex);
214 altqp->isq_rwqes_posted--;
215 if (altqp->isq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
216 mutex_exit(&altqp->isq_mutex);
217
218 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
219 ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
220 "ibmf_i_handle_recv_compl(): %s, "
221 "QP# = %d\n", tnf_string, msg,
222 "Posting more RQ WQEs",
223 tnf_int, qpnum, altqp->isq_qpn);
224
225 /* Post an additional WQE to the RQ */
226 ret = ibmf_i_post_recv_buffer(cip, NULL,
227 B_FALSE, ibmf_qp_handle);
228 if (ret != IBMF_SUCCESS) {
229 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
230 ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
231 "", "ibmf_i_handle_recv_compl(): %s, "
232 "status = %d\n", tnf_string, msg,
233 "ibmf_i_post_recv_buffer() failed",
234 tnf_int, status, ret);
235 }
236
237 mutex_enter(&altqp->isq_mutex);
238 }
239 mutex_exit(&altqp->isq_mutex);
240 }
241
242 /*
243 * for all other completion errors, repost the wqe, and if that
244 * fails, free the wqe and return.
245 */
246 if (wcp->wc_status != IBT_WC_SUCCESS) {
247 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
248 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
249 ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
250 "", "ibmf_i_handle_recv_completion(): %s, wc_status = %d\n",
251 tnf_string, msg, "bad completion status received",
252 tnf_uint, wc_status, wcp->wc_status);
253 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
254 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
255 "", "ibmf_i_handle_recv_completion() exit\n");
256 return;
257 }
258
259 /* find the client corresponding to this recv cqe */
260 madhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
261 sizeof (ib_grh_t));
262
263 /* drop packet if MAD Base Version is not as expected */
264 if (madhdrp->BaseVersion != MAD_CLASS_BASE_VERS_1) {
265 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
266 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
267 ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
268 "", "ibmf_i_handle_recv_completion(): %s\n",
269 tnf_string, msg, "bad MAD version");
270 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
271 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
272 "", "ibmf_i_handle_recv_completion() exit\n");
273 return;
274 }
275
276 if (ibmf_i_get_class(madhdrp, recv_wqep->recv_ibmf_qp_handle,
277 wcp->wc_slid, &class) != IBMF_SUCCESS) {
278 /* bad class & type? */
279 #ifdef DEBUG
280 ibmf_i_dump_wcp(cip, wcp, recv_wqep);
281 #endif
282 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
283 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
284 ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
285 "", "ibmf_i_handle_recv_completion(): %s\n",
286 tnf_string, msg, "bad class/type");
287 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
288 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
289 "", "ibmf_i_handle_recv_completion() exit\n");
290 return;
291 }
292
293 ret = ibmf_i_lookup_client_by_mgmt_class(cip, recv_wqep->recv_port_num,
294 class, &clientp);
295 if (ret == IBMF_SUCCESS) {
296 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
297 recv_wqep->recv_client = clientp;
298 recv_wqep->recv_wc = *wcp; /* struct copy */
299
300 /*
301 * Increment the kstats for the number of active receiver side
302 * callbacks
303 */
304 mutex_enter(&clientp->ic_kstat_mutex);
305 IBMF_ADD32_KSTATS(clientp, recv_cb_active, 1);
306 mutex_exit(&clientp->ic_kstat_mutex);
307
308 if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
309 /* Dispatch the taskq thread to do further processing */
310 ret = taskq_dispatch(clientp->ic_recv_taskq,
311 ibmf_i_do_recv_cb, recv_wqep, TQ_NOSLEEP);
312 if (ret == 0) {
313 mutex_enter(&clientp->ic_kstat_mutex);
314 IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
315 mutex_exit(&clientp->ic_kstat_mutex);
316 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
317 ibmf_i_handle_recv_completion_err,
318 IBMF_TNF_ERROR, "",
319 "ibmf_i_handle_recv_completion(): %s\n",
320 tnf_string, msg, "dispatch failed");
321 (void) ibmf_i_repost_recv_buffer(cip,
322 recv_wqep);
323 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
324 ibmf_i_handle_recv_completion_end,
325 IBMF_TNF_TRACE, "",
326 "ibmf_i_handle_recv_completion() exit\n");
327 return;
328 }
329 } else {
330 ibmf_i_do_recv_cb((void *)recv_wqep);
331 }
332
333 /*
334 * Decrement the kstats for the number of active receiver side
335 * callbacks
336 */
337 mutex_enter(&clientp->ic_kstat_mutex);
338 IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
339 mutex_exit(&clientp->ic_kstat_mutex);
340
341 } else {
342 /*
343 * A client has not registered to receive MADs of this
344 * management class. IBMF must attempt to load the
345 * client and request a resend of the request MAD.
346 * The name of the client MAD is derived using a
347 * convention described in PSARC case 2003/753.
348 */
349
350 ibmf_mod_load_args_t *modlargsp;
351
352 /*
353 * HCA driver handles the Performance management
354 * class MAD's. It registers with the IBMF during early
355 * boot and unregisters during detach and during
356 * HCA unconfigure operation. We come here
357 * 1. Before HCA registers with IBMF
358 * Drop the MAD. Since this is a UD MAD,
359 * sender will resend the request
360 * 2. After HCA unregistered with IBMF during DR operation.
361 * Since HCA is going away, we can safely drop the PMA
362 * MAD's here.
363 * Solaris does not support BM_AGENT and so drop the BM MAD's
364 */
365 if ((class == PERF_AGENT) || (class == BM_AGENT)) {
366 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
367 return;
368 }
369
370 recv_wqep->recv_wc = *wcp; /* struct copy */
371
372 IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L4,
373 ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR, "",
374 "ibmf_i_handle_recv_completion(): %s, port = %d, "
375 "class = 0x%x\n",
376 tnf_string, msg, "no client registered", tnf_uint, port,
377 recv_wqep->recv_port_num, tnf_opaque, class, class);
378
379 /* Construct the IBMF client module name */
380 ibmf_get_mod_name(madhdrp->MgmtClass, class,
381 ibmf_client_modname);
382
383 /* Load the module using a taskq thread */
384 modlargsp = (ibmf_mod_load_args_t *)kmem_zalloc(
385 sizeof (ibmf_mod_load_args_t), KM_NOSLEEP);
386 if (modlargsp != NULL) {
387 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*modlargsp))
388 modlargsp->cip = cip;
389 modlargsp->recv_wqep = recv_wqep;
390 modlargsp->modname = ibmf_client_modname;
391 modlargsp->ibmf_class = class;
392 ret = taskq_dispatch(ibmf_statep->ibmf_taskq,
393 ibmf_module_load, modlargsp, TQ_NOSLEEP);
394 if (ret == 0) {
395 kmem_free(modlargsp,
396 sizeof (ibmf_mod_load_args_t));
397 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
398 ibmf_i_handle_recv_completion_error,
399 IBMF_TNF_TRACE, "",
400 "ibmf_i_handle_recv_completion(): Failed "
401 "to dispatch ibmf_module_load taskq\n");
402 (void) ibmf_i_repost_recv_buffer(cip,
403 recv_wqep);
404 }
405 } else {
406 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
407 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
408 "", "ibmf_i_handle_recv_completion(): "
409 "Failed to allocate memory for modlargs\n");
410 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
411 }
412 }
413
414 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
415 ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE, "",
416 "ibmf_i_handle_recv_completion() exit\n");
417 }
418
419 /*
420 * ibmf_i_do_recv_cb():
421 * This routine does the following:
422 * o looks for a message in the client's message list
423 * o creates a new message if one does not exist for unsolicited data
424 * o invoke routines to do specific handling for rmpp and non-rmpp cases
425 * o on a failure, the receive WQE is reposted to the RQ
426 */
427 static void
ibmf_i_do_recv_cb(void * taskq_arg)428 ibmf_i_do_recv_cb(void *taskq_arg)
429 {
430 ibt_wc_t *wcp;
431 ibmf_msg_impl_t *msgimplp;
432 ibmf_client_t *clientp;
433 ibmf_addr_info_t addrinfo;
434 ibmf_recv_wqe_t *recv_wqep;
435 ib_grh_t *ib_grh;
436 boolean_t grhpresent;
437 ibmf_qp_handle_t ibmf_qp_handle;
438 ib_mad_hdr_t *mad_hdr;
439 ibmf_rmpp_hdr_t *rmpp_hdr;
440 ibmf_alt_qp_t *qpp;
441 ib_gid_t gid;
442 ib_lid_t lid;
443 int msg_trans_state_flags, msg_flags;
444 uint_t ref_cnt;
445 timeout_id_t msg_rp_unset_id, msg_tr_unset_id;
446 timeout_id_t msg_rp_set_id, msg_tr_set_id;
447 int status;
448 saa_port_t *saa_portp;
449
450 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
451
452 /* The taskq_arg argument is a pointer to the receive WQE context */
453 recv_wqep = taskq_arg;
454
455 /* Retrieve the QP handle from the receive WQE context */
456 ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
457
458 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
459 ibmf_i_do_recv_cb_start, IBMF_TNF_TRACE, "",
460 "ibmf_i_do_recv_cb() enter, recv_wqep = %p\n",
461 tnf_opaque, recv_wqep, recv_wqep);
462
463 /* Retrieve the client context pointer from the receive WQE context */
464 clientp = recv_wqep->recv_client;
465
466 /* Get a pointer to the IBT work completion structure */
467 wcp = &recv_wqep->recv_wc;
468
469 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wcp))
470
471 /*
472 * Identify the port by the LID or GID depending on whether the
473 * Global Route Header is valid or not
474 */
475 if (wcp->wc_flags & IBT_WC_GRH_PRESENT) {
476 grhpresent = B_TRUE;
477 ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
478 gid.gid_prefix = b2h64(ib_grh->SGID.gid_prefix);
479 gid.gid_guid = b2h64(ib_grh->SGID.gid_guid);
480 } else {
481 grhpresent = B_FALSE;
482 lid = wcp->wc_slid;
483 }
484
485 /* Get a pointer to the MAD header */
486 mad_hdr = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
487 sizeof (ib_grh_t));
488
489 /* Get a pointer to the RMPP header */
490 rmpp_hdr = (ibmf_rmpp_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
491 sizeof (ib_grh_t) + sizeof (ib_mad_hdr_t));
492
493 IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
494 ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
495 "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ", class = 0x%x, "
496 "attrID = 0x%x, lid = 0x%x\n",
497 tnf_string, msg, "Received MAD", tnf_opaque, tid,
498 b2h64(mad_hdr->TransactionID), tnf_opaque, class,
499 mad_hdr->MgmtClass, tnf_opaque, attr_id,
500 b2h16(mad_hdr->AttributeID), tnf_opaque, remote_lid, lid);
501
502 /*
503 * Look for the matching message in the client's message list
504 * NOTE: if the message is found, the message reference count will
505 * have been increased by 1.
506 */
507 msgimplp = ibmf_i_find_msg(clientp, b2h64(mad_hdr->TransactionID),
508 mad_hdr->MgmtClass, mad_hdr->R_Method, lid, &gid, grhpresent,
509 rmpp_hdr, IBMF_REG_MSG_LIST);
510
511 /*
512 * If the message is not on the regular message list, search
513 * for it in the termination message list.
514 */
515 if (msgimplp == NULL) {
516 msgimplp = ibmf_i_find_msg(clientp,
517 b2h64(mad_hdr->TransactionID), mad_hdr->MgmtClass,
518 mad_hdr->R_Method, lid, &gid, grhpresent, rmpp_hdr,
519 IBMF_TERM_MSG_LIST);
520 }
521
522 if (msgimplp != NULL) {
523
524 /* if this packet is from the SA */
525 if (clientp->ic_client_info.client_class == SUBN_ADM_MANAGER) {
526
527 /*
528 * ibmf_saa's callback arg is its saa_portp;
529 * take advantage of this fact to quickly update the
530 * port's SA uptime. ibmf_saa uses the up time to
531 * determine if the SA is still alive
532 */
533 saa_portp = clientp->ic_async_cb_arg;
534
535 /* update the SA uptime */
536 mutex_enter(&saa_portp->saa_pt_mutex);
537
538 saa_portp->saa_pt_sa_uptime = gethrtime();
539
540 mutex_exit(&saa_portp->saa_pt_mutex);
541 }
542
543 mutex_enter(&msgimplp->im_mutex);
544
545 /*
546 * Clear timers for transactions of solicited incoming packets
547 */
548 if (msgimplp->im_rp_timeout_id != 0) {
549 ibmf_i_unset_timer(msgimplp, IBMF_RESP_TIMER);
550 }
551
552 /*
553 * If a MAD is received in the middle of an RMPP receive
554 * transaction, and the MAD's RMPPFlags.Active bit is 0,
555 * drop the MAD
556 */
557 if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
558 (msgimplp->im_flags & IBMF_MSG_FLAGS_RECV_RMPP) &&
559 ((rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE) == 0)) {
560 mutex_exit(&msgimplp->im_mutex);
561 (void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
562 recv_wqep);
563 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
564 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
565 "ibmf_i_do_recv_cb(): %s, msg = %p\n",
566 tnf_string, msg,
567 "Non-RMPP MAD received in RMPP transaction, "
568 "dropping MAD", tnf_opaque, msgimplp, msgimplp);
569 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
570 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
571 "ibmf_i_do_recv_cb() exit\n");
572 return;
573 }
574
575 /*
576 * If the message has been marked unitialized or done
577 * release the message mutex and return
578 */
579 if ((msgimplp->im_trans_state_flags &
580 IBMF_TRANS_STATE_FLAG_DONE) ||
581 (msgimplp->im_trans_state_flags &
582 IBMF_TRANS_STATE_FLAG_UNINIT)) {
583 IBMF_MSG_DECR_REFCNT(msgimplp);
584 msg_trans_state_flags = msgimplp->im_trans_state_flags;
585 msg_flags = msgimplp->im_flags;
586 ref_cnt = msgimplp->im_ref_count;
587 mutex_exit(&msgimplp->im_mutex);
588 (void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
589 recv_wqep);
590 /*
591 * This thread may notify the client only if the
592 * transaction is done, the message has been removed
593 * from the client's message list, and the message
594 * reference count is 0.
595 * If the transaction is done, and the message reference
596 * count = 0, there is still a possibility that a
597 * packet could arrive for the message and its reference
598 * count increased if the message is still on the list.
599 * If the message is still on the list, it will be
600 * removed by a call to ibmf_i_client_rem_msg() at
601 * the completion point of the transaction.
602 * So, the reference count should be checked after the
603 * message has been removed.
604 */
605 if ((msg_trans_state_flags &
606 IBMF_TRANS_STATE_FLAG_DONE) &&
607 !(msg_flags & IBMF_MSG_FLAGS_ON_LIST) &&
608 (ref_cnt == 0)) {
609
610 ibmf_i_notify_sequence(clientp, msgimplp,
611 msg_flags);
612
613 }
614 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
615 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
616 "ibmf_i_do_recv_cb(): %s, msg = %p\n",
617 tnf_string, msg,
618 "Message already marked for removal, dropping MAD",
619 tnf_opaque, msgimplp, msgimplp);
620 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
621 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
622 "ibmf_i_do_recv_cb() exit\n");
623 return;
624 }
625 } else {
626 /* unsolicited message packet */
627
628 /*
629 * Check if the client context, the alternate QP context
630 * (if not the default QP), and the incoming MAD support RMPP
631 */
632 if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
633 (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
634
635 /* Only unsolicited packets should be data seg 1 */
636 if ((rmpp_hdr->rmpp_flags &
637 IBMF_RMPP_FLAGS_FIRST_PKT) == 0) {
638 (void) ibmf_i_repost_recv_buffer(
639 clientp->ic_myci, recv_wqep);
640 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
641 ibmf_i_do_recv_cb_error, IBMF_TNF_TRACE, "",
642 "ibmf_i_do_recv_cb(): %s\n",
643 tnf_string, msg,
644 "unsolicited rmpp packet not first packet");
645 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
646 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
647 "ibmf_i_do_recv_cb() exit\n");
648 return;
649 }
650 }
651
652 /*
653 * Before we alloc a message context, check to see if
654 * a callback has been registered with the client
655 * for this unsolicited message.
656 * If one has been registered, increment the recvs active
657 * count to get the teardown routine to wait until
658 * this callback is complete.
659 */
660 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
661
662 mutex_enter(&clientp->ic_mutex);
663 if (clientp->ic_recv_cb == NULL) {
664 mutex_exit(&clientp->ic_mutex);
665 (void) ibmf_i_repost_recv_buffer(
666 clientp->ic_myci, recv_wqep);
667 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
668 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
669 "ibmf_i_do_recv_cb(): %s, class %x\n",
670 tnf_string, msg,
671 "ibmf_tear_down_recv_cb already occurred",
672 tnf_opaque, class,
673 clientp->ic_client_info.client_class);
674 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
675 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
676 "ibmf_i_do_recv_cb() exit\n");
677 return;
678 }
679 IBMF_RECV_CB_SETUP(clientp);
680 mutex_exit(&clientp->ic_mutex);
681 } else {
682 qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
683
684 mutex_enter(&qpp->isq_mutex);
685 if (qpp->isq_recv_cb == NULL) {
686 mutex_exit(&qpp->isq_mutex);
687 (void) ibmf_i_repost_recv_buffer(
688 clientp->ic_myci, recv_wqep);
689 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
690 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
691 "ibmf_i_do_recv_cb(): %s, class %x\n",
692 tnf_string, msg,
693 "ibmf_tear_down_recv_cb already occurred",
694 tnf_opaque, class,
695 clientp->ic_client_info.client_class);
696 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
697 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
698 "ibmf_i_do_recv_cb() exit\n");
699 return;
700 }
701 IBMF_ALT_RECV_CB_SETUP(qpp);
702 mutex_exit(&qpp->isq_mutex);
703 }
704
705 /*
706 * Allocate a message context
707 */
708 msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(
709 sizeof (ibmf_msg_impl_t), KM_NOSLEEP);
710
711 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
712
713 /* If we cannot allocate memory, drop the packet and clean up */
714 if (msgimplp == NULL) {
715 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
716 mutex_enter(&clientp->ic_mutex);
717 IBMF_RECV_CB_CLEANUP(clientp);
718 mutex_exit(&clientp->ic_mutex);
719 } else {
720 qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
721 mutex_enter(&qpp->isq_mutex);
722 IBMF_ALT_RECV_CB_CLEANUP(qpp);
723 mutex_exit(&qpp->isq_mutex);
724 }
725 (void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
726 recv_wqep);
727 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
728 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
729 "ibmf_i_do_recv_cb(): %s\n", tnf_string, msg,
730 "mem allocation failure");
731 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
732 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
733 "ibmf_i_do_recv_cb() exit\n");
734 return;
735 }
736
737 /* Get the port's base LID if it's not in the client context */
738 if ((clientp->ic_base_lid == 0) &&
739 (clientp->ic_qp->iq_qp_num != 0)) {
740 (void) ibt_get_port_state_byguid(
741 clientp->ic_client_info.ci_guid,
742 clientp->ic_client_info.port_num, NULL,
743 &clientp->ic_base_lid);
744 if (clientp->ic_base_lid == 0) {
745 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
746 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
747 "ibmf_i_do_recv_cb(): %s\n",
748 tnf_string, msg, "base_lid is undefined");
749 }
750 }
751
752 /* Set up address information */
753 addrinfo.ia_local_lid = clientp->ic_base_lid +
754 wcp->wc_path_bits;
755 addrinfo.ia_remote_lid = wcp->wc_slid;
756 addrinfo.ia_remote_qno = wcp->wc_qpn;
757
758 /* Get the pkey, including the correct partiton membership */
759 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
760 if (recv_wqep->recv_qpp->iq_qp_num == IBMF_QP1_NUM) {
761
762 /*
763 * here too we expect the pkey index in the work
764 * completion belongs to a pkey in the pkey
765 * table
766 */
767 status = ibmf_i_pkey_ix_to_key(
768 clientp->ic_myci, recv_wqep->recv_port_num,
769 wcp->wc_pkey_ix, &addrinfo.ia_p_key);
770 if (status != IBMF_SUCCESS) {
771 IBMF_TRACE_2(IBMF_TNF_NODEBUG,
772 DPRINT_L1, ibmf_i_do_recv_cb_error,
773 IBMF_TNF_ERROR, "",
774 "ibmf_i_do_recv_cb(): "
775 "get_pkey failed for ix %d,"
776 "status = %d\n", tnf_uint,
777 pkeyix, wcp->wc_pkey_ix, tnf_uint,
778 ibmf_status, status);
779 mutex_enter(&clientp->ic_mutex);
780 IBMF_RECV_CB_CLEANUP(clientp);
781 mutex_exit(&clientp->ic_mutex);
782 (void) ibmf_i_repost_recv_buffer(
783 clientp->ic_myci, recv_wqep);
784 mutex_destroy(&msgimplp->im_mutex);
785 cv_destroy(&msgimplp->im_trans_cv);
786 kmem_free(msgimplp,
787 sizeof (ibmf_msg_impl_t));
788 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
789 ibmf_i_do_recv_cb_end,
790 IBMF_TNF_TRACE, "",
791 "ibmf_i_do_recv_cb() exit\n");
792 return;
793 }
794 }
795 addrinfo.ia_q_key = IBMF_MGMT_Q_KEY;
796 } else {
797 qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
798
799 /* For alternate QPs, the pkey is in the QP context */
800 mutex_enter(&qpp->isq_mutex);
801 addrinfo.ia_p_key = qpp->isq_pkey;
802 addrinfo.ia_q_key = qpp->isq_qkey;
803 mutex_exit(&qpp->isq_mutex);
804 }
805
806 addrinfo.ia_service_level = wcp->wc_sl;
807 msgimplp->im_local_addr = addrinfo;
808
809 /* Initialize the message context */
810 cv_init(&msgimplp->im_trans_cv, NULL, CV_DRIVER, NULL);
811 mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
812 msgimplp->im_client = clientp;
813 msgimplp->im_qp_hdl = ibmf_qp_handle;
814 msgimplp->im_flags = 0;
815 msgimplp->im_unsolicited = B_TRUE;
816 msgimplp->im_tid = b2h64(mad_hdr->TransactionID);
817 msgimplp->im_mgt_class = mad_hdr->MgmtClass;
818 msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
819 msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
820 msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
821 msgimplp->im_retrans.retrans_trans_to =
822 IBMF_RETRANS_DEF_TRANS_TO;
823 msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
824 msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
825 IBMF_MSG_INCR_REFCNT(msgimplp);
826 msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
827
828 /*
829 * Initialize (and possibly allocate) the IBT UD destination
830 * address handle.
831 */
832 status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
833 &msgimplp->im_ud_dest, B_FALSE);
834 if (status != IBMF_SUCCESS) {
835 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
836 mutex_enter(&clientp->ic_mutex);
837 IBMF_RECV_CB_CLEANUP(clientp);
838 mutex_exit(&clientp->ic_mutex);
839 } else {
840 qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
841 mutex_enter(&qpp->isq_mutex);
842 IBMF_ALT_RECV_CB_CLEANUP(qpp);
843 mutex_exit(&qpp->isq_mutex);
844 }
845 (void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
846 recv_wqep);
847 mutex_destroy(&msgimplp->im_mutex);
848 cv_destroy(&msgimplp->im_trans_cv);
849 kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
850 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
851 ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
852 "ibmf_i_do_recv_cb(): %s, status = %d\n",
853 tnf_string, msg, "alloc ah failed", tnf_uint,
854 ibmf_status, status);
855 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
856 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
857 "ibmf_i_do_recv_cb() exit\n");
858 return;
859 }
860
861 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp))
862
863 /* add message to client's list */
864 ibmf_i_client_add_msg(clientp, msgimplp);
865
866 mutex_enter(&msgimplp->im_mutex);
867
868 /* no one should have touched our state */
869 ASSERT(msgimplp->im_trans_state_flags ==
870 IBMF_TRANS_STATE_FLAG_UNINIT);
871
872 /* transition out of uninit state */
873 msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
874 }
875
876 /* fill in the grh with the contents of the recv wqe */
877 if (grhpresent == B_TRUE) {
878 uint32_t tmp32;
879
880 msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_GLOBAL_ADDRESS;
881 ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
882 msgimplp->im_global_addr.ig_sender_gid.gid_prefix =
883 b2h64(ib_grh->SGID.gid_prefix);
884 msgimplp->im_global_addr.ig_sender_gid.gid_guid =
885 b2h64(ib_grh->SGID.gid_guid);
886 msgimplp->im_global_addr.ig_recver_gid.gid_prefix =
887 b2h64(ib_grh->DGID.gid_prefix);
888 msgimplp->im_global_addr.ig_recver_gid.gid_guid =
889 b2h64(ib_grh->DGID.gid_guid);
890 /*
891 * swap to get byte order back to wire format on little endian
892 * systems so we can apply the GRH masks
893 */
894 tmp32 = b2h32(ib_grh->IPVer_TC_Flow);
895 msgimplp->im_global_addr.ig_flow_label =
896 tmp32 & IB_GRH_FLOW_LABEL_MASK;
897 msgimplp->im_global_addr.ig_tclass =
898 (tmp32 & IB_GRH_TCLASS_MASK) >> 20;
899 msgimplp->im_global_addr.ig_hop_limit =
900 ib_grh->HopLmt;
901 }
902
903 /* Perform RMPP or non-RMPP processing */
904 if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
905 (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
906 IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
907 ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
908 "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ","
909 "flags = 0x%x rmpp_type = %d, rmpp_segnum = %d\n",
910 tnf_string, msg, "Handling rmpp MAD",
911 tnf_opaque, tid, b2h64(mad_hdr->TransactionID),
912 tnf_opaque, flags, rmpp_hdr->rmpp_flags,
913 tnf_opaque, type, rmpp_hdr->rmpp_type,
914 tnf_opaque, segment, b2h32(rmpp_hdr->rmpp_segnum));
915
916 /*
917 * Set the RMPP state to "receiver active" on the first packet
918 * of all RMPP message, and initialize the
919 * the expected segment to 1.
920 */
921 if ((msgimplp->im_rmpp_ctx.rmpp_state ==
922 IBMF_RMPP_STATE_UNDEFINED) &&
923 (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_FIRST_PKT)) {
924
925 msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
926
927 if (rmpp_hdr->rmpp_type == IBMF_RMPP_TYPE_DATA) {
928 msgimplp->im_rmpp_ctx.rmpp_state =
929 IBMF_RMPP_STATE_RECEVR_ACTIVE;
930
931 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
932 ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
933 "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
934 tnf_string, msg, "first RMPP pkt received",
935 tnf_opaque, msgimplp, msgimplp);
936 }
937
938 msgimplp->im_rmpp_ctx.rmpp_es = 1;
939 msgimplp->im_rmpp_ctx.rmpp_wl = 1;
940 msgimplp->im_rmpp_ctx.rmpp_wf = 1;
941
942 /* set double-sided transfer flag for certain methods */
943 if (mad_hdr->R_Method == SA_SUBN_ADM_GET_MULTI)
944 msgimplp->im_rmpp_ctx.rmpp_is_ds = B_TRUE;
945 else msgimplp->im_rmpp_ctx.rmpp_is_ds = B_FALSE;
946
947 msgimplp->im_trans_state_flags |=
948 IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
949 }
950
951 if (rmpp_hdr->rmpp_resp_time != IBMF_RMPP_DEFAULT_RRESPT) {
952 msgimplp->im_retrans.retrans_rtv =
953 1 << rmpp_hdr->rmpp_resp_time;
954
955 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
956 ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
957 "ibmf_i_do_recv_cb: %s, resp_time %d\n",
958 tnf_string, msg, "new resp time received",
959 tnf_uint, resp_time, rmpp_hdr->rmpp_resp_time);
960 }
961
962 ibmf_i_handle_rmpp(clientp, ibmf_qp_handle, msgimplp,
963 (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
964 sizeof (ib_grh_t)));
965 } else {
966
967 msgimplp->im_trans_state_flags |=
968 IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
969
970 ibmf_i_handle_non_rmpp(clientp, msgimplp,
971 (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
972 sizeof (ib_grh_t)));
973 }
974
975 msg_rp_unset_id = msg_tr_unset_id = msg_rp_set_id = msg_tr_set_id = 0;
976
977 /*
978 * Save the transaction state flags and the timeout IDs
979 * before releasing the mutex as they may be changed after that.
980 */
981 msg_trans_state_flags = msgimplp->im_trans_state_flags;
982 msg_flags = msgimplp->im_flags;
983 msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
984 msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
985 msgimplp->im_rp_unset_timeout_id = 0;
986 msgimplp->im_tr_unset_timeout_id = 0;
987
988 /*
989 * Decrement the message reference count
990 * This count was incremented either when the message was found
991 * on the client's message list (ibmf_i_find_msg()) or when
992 * a new message was created for unsolicited data
993 */
994 IBMF_MSG_DECR_REFCNT(msgimplp);
995
996 if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
997 if (msgimplp->im_rp_timeout_id != 0) {
998 msg_rp_set_id = msgimplp->im_rp_timeout_id;
999 msgimplp->im_rp_timeout_id = 0;
1000 }
1001 if (msgimplp->im_tr_timeout_id != 0) {
1002 msg_tr_set_id = msgimplp->im_tr_timeout_id;
1003 msgimplp->im_tr_timeout_id = 0;
1004 }
1005 }
1006
1007 mutex_exit(&msgimplp->im_mutex);
1008
1009 /*
1010 * Call untimeout() after releasing the lock because the
1011 * lock is acquired in the timeout handler as well. Untimeout()
1012 * does not return until the timeout handler has run, if it already
1013 * fired, which would result in a deadlock if we did not first
1014 * release the im_mutex lock.
1015 */
1016 if (msg_rp_unset_id != 0) {
1017 (void) untimeout(msg_rp_unset_id);
1018 }
1019
1020 if (msg_tr_unset_id != 0) {
1021 (void) untimeout(msg_tr_unset_id);
1022 }
1023
1024 if (msg_rp_set_id != 0) {
1025 (void) untimeout(msg_rp_set_id);
1026 }
1027
1028 if (msg_tr_set_id != 0) {
1029 (void) untimeout(msg_tr_set_id);
1030 }
1031
1032 /* Increment the kstats for number of messages received */
1033 mutex_enter(&clientp->ic_kstat_mutex);
1034 IBMF_ADD32_KSTATS(clientp, msgs_received, 1);
1035 mutex_exit(&clientp->ic_kstat_mutex);
1036
1037 /*
1038 * now that we are done gleaning all we want out of the receive
1039 * completion, we repost the receive request.
1040 */
1041 (void) ibmf_i_repost_recv_buffer(clientp->ic_myci, recv_wqep);
1042
1043 /*
1044 * If the transaction flags indicate a completed transaction,
1045 * notify the client
1046 */
1047 if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
1048 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
1049 ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
1050 "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
1051 tnf_string, msg, "notifying client",
1052 tnf_opaque, msgimplp, msgimplp);
1053
1054 /* Remove the message from the client's message list */
1055 ibmf_i_client_rem_msg(clientp, msgimplp, &ref_cnt);
1056
1057 /*
1058 * Notify the client if the message reference count is zero.
1059 * At this point, we know that the transaction is done and
1060 * the message has been removed from the client's message list.
1061 * So, we only need to make sure the reference count is zero
1062 * before notifying the client.
1063 */
1064 if (ref_cnt == 0) {
1065
1066 ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
1067
1068 }
1069 }
1070
1071 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1072 ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
1073 "ibmf_i_do_recv_cb() exit, msgimplp = %p\n",
1074 tnf_opaque, msgimplp, msgimplp);
1075 }
1076
1077 /*
1078 * ibmf_i_handle_non_rmpp():
1079 * Handle non-RMPP processing of an incoming IB packet
1080 */
1081 void
ibmf_i_handle_non_rmpp(ibmf_client_t * clientp,ibmf_msg_impl_t * msgimplp,uchar_t * mad)1082 ibmf_i_handle_non_rmpp(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
1083 uchar_t *mad)
1084 {
1085 ibmf_rmpp_ctx_t *rmpp_ctx = &msgimplp->im_rmpp_ctx;
1086 ib_mad_hdr_t *mad_hdr;
1087 size_t offset;
1088 uchar_t *msgbufp;
1089 uint32_t clhdrsz, clhdroff;
1090
1091 IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1092 ibmf_i_handle_non_rmpp_start, IBMF_TNF_TRACE, "",
1093 "ibmf_i_handle_non_rmpp(): clientp = 0x%p, "
1094 "msgp = 0x%p, madp = 0x%p\n", tnf_opaque, clientp, clientp,
1095 tnf_opaque, msg, msgimplp, tnf_opaque, mad, mad);
1096
1097 ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
1098
1099 /* Get the MAD header */
1100 mad_hdr = (ib_mad_hdr_t *)mad;
1101
1102 /* Determine the MAD's class header size */
1103 ibmf_i_mgt_class_to_hdr_sz_off(mad_hdr->MgmtClass, &clhdrsz, &clhdroff);
1104
1105 /* Allocate the message receive buffers if not already allocated */
1106 if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1107
1108 msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
1109 (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
1110 if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1111
1112 IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1113 ibmf_i_handle_non_rmpp_err, IBMF_TNF_ERROR, "",
1114 "ibmf_i_handle_non_rmpp(): %s\n", tnf_string, msg,
1115 "mem allocation failure (non-rmpp payload)");
1116
1117 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1118 ibmf_i_handle_non_rmpp_end, IBMF_TNF_TRACE, "",
1119 "ibmf_i_handle_non_rmpp() exit\n");
1120
1121 return;
1122 }
1123 mutex_enter(&clientp->ic_kstat_mutex);
1124 IBMF_ADD32_KSTATS(clientp, recv_bufs_alloced, 1);
1125 mutex_exit(&clientp->ic_kstat_mutex);
1126 }
1127
1128 /* Get a pointer to the MAD location in the receive buffer */
1129 msgbufp = (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr;
1130
1131 /* Copy the incoming MAD into the receive buffer */
1132 bcopy((const void *)mad, (void *)msgbufp, IBMF_MAD_SIZE);
1133
1134 /* Get the offset of the class header */
1135 offset = sizeof (ib_mad_hdr_t) + clhdroff;
1136
1137 /* initialize class header pointer */
1138 if (clhdrsz == 0) {
1139 msgimplp->im_msgbufs_recv.im_bufs_cl_hdr = NULL;
1140 } else {
1141 msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
1142 (void *)(msgbufp + offset);
1143 }
1144 msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len = clhdrsz;
1145
1146 offset += clhdrsz;
1147
1148 /* initialize data area pointer */
1149 msgimplp->im_msgbufs_recv.im_bufs_cl_data = (void *)(msgbufp + offset);
1150 msgimplp->im_msgbufs_recv.im_bufs_cl_data_len = IBMF_MAD_SIZE -
1151 sizeof (ib_mad_hdr_t) - clhdroff - clhdrsz;
1152
1153 rmpp_ctx->rmpp_state = IBMF_RMPP_STATE_DONE;
1154 ibmf_i_terminate_transaction(clientp, msgimplp, IBMF_SUCCESS);
1155
1156 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_handle_non_rmpp_end,
1157 IBMF_TNF_TRACE, "", "ibmf_i_handle_non_rmpp() exit\n");
1158 }
1159
1160 /*
1161 * ibmf_i_repost_recv_buffer():
1162 * Repost a WQE to the RQ after processing it
1163 */
1164 /* ARGSUSED */
1165 int
ibmf_i_repost_recv_buffer(ibmf_ci_t * cip,ibmf_recv_wqe_t * recv_wqep)1166 ibmf_i_repost_recv_buffer(ibmf_ci_t *cip, ibmf_recv_wqe_t *recv_wqep)
1167 {
1168 int ret;
1169 ibt_status_t status;
1170 ibmf_qp_handle_t ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
1171 struct kmem_cache *kmem_cachep;
1172 ibmf_alt_qp_t *altqp;
1173 ibmf_qp_t *qpp;
1174
1175 IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1176 ibmf_i_repost_recv_buffer_start, IBMF_TNF_TRACE, "",
1177 "ibmf_i_repost_recv_buffer() enter, cip = %p, rwqep = %p\n",
1178 tnf_opaque, cip, cip, tnf_opaque, rwqep, recv_wqep);
1179
1180 ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
1181
1182 /* Get the WQE kmem cache pointer based on the QP type */
1183 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1184 kmem_cachep = cip->ci_recv_wqes_cache;
1185 qpp = recv_wqep->recv_qpp;
1186 } else {
1187 altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
1188 kmem_cachep = altqp->isq_recv_wqes_cache;
1189 }
1190
1191 /* post recv wqe; free it if the post fails */
1192 status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
1193 1, NULL);
1194
1195 ret = ibmf_i_ibt_to_ibmf_status(status);
1196 if (ret != IBMF_SUCCESS) {
1197 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1198 ibmf_i_repost_recv_buffer_err, IBMF_TNF_ERROR, "",
1199 "ibmf_i_repost_recv_buffer(): %s, status = %d\n",
1200 tnf_string, msg, "repost_recv failed", tnf_uint,
1201 ibt_status, status);
1202 kmem_free(recv_wqep->recv_wr.wr_sgl,
1203 IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t));
1204 kmem_cache_free(kmem_cachep, recv_wqep);
1205 mutex_enter(&cip->ci_mutex);
1206 IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
1207 mutex_exit(&cip->ci_mutex);
1208 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1209 mutex_enter(&cip->ci_mutex);
1210 cip->ci_wqes_alloced--;
1211 if (cip->ci_wqes_alloced == 0)
1212 cv_signal(&cip->ci_wqes_cv);
1213 mutex_exit(&cip->ci_mutex);
1214 } else {
1215 mutex_enter(&altqp->isq_mutex);
1216 altqp->isq_wqes_alloced--;
1217 if (altqp->isq_wqes_alloced == 0)
1218 cv_signal(&altqp->isq_wqes_cv);
1219 mutex_exit(&altqp->isq_mutex);
1220 }
1221 }
1222
1223 if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1224 mutex_enter(&qpp->iq_mutex);
1225 qpp->iq_rwqes_posted++;
1226 mutex_exit(&qpp->iq_mutex);
1227 } else {
1228 mutex_enter(&altqp->isq_mutex);
1229 altqp->isq_rwqes_posted++;
1230 mutex_exit(&altqp->isq_mutex);
1231 }
1232
1233 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_repost_recv_buffer_end,
1234 IBMF_TNF_TRACE, "", "ibmf_i_repost_recv_buffer() exit\n");
1235 return (ret);
1236 }
1237
1238 /*
1239 * ibmf_i_get_class:
1240 * Parses the mad header and determines which class should be notified of the
1241 * notification.
1242 *
1243 * Input Argument
1244 * madhdrp contents of mad header for the packet
1245 *
1246 * Output Argument
1247 * dest_classp pointer to the class type of the client that should be notified
1248 *
1249 * Returns
1250 * status
1251 */
1252 static int
ibmf_i_get_class(ib_mad_hdr_t * madhdrp,ibmf_qp_handle_t dest_ibmf_qp_handle,ib_lid_t slid,ibmf_client_type_t * dest_classp)1253 ibmf_i_get_class(ib_mad_hdr_t *madhdrp, ibmf_qp_handle_t dest_ibmf_qp_handle,
1254 ib_lid_t slid, ibmf_client_type_t *dest_classp)
1255 {
1256 int method = madhdrp->R_Method;
1257 int attrib = b2h16(madhdrp->AttributeID);
1258 int class = madhdrp->MgmtClass;
1259 uint32_t attrib_mod = b2h32(madhdrp->AttributeModifier);
1260
1261 IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
1262 ibmf_i_get_class_start, IBMF_TNF_TRACE, "",
1263 "ibmf_i_get_class() enter, class = 0x%x, method = 0x%x, "
1264 "attribute = 0x%x, dest_qp_hdl = 0x%p\n",
1265 tnf_opaque, class, class,
1266 tnf_opaque, method, method,
1267 tnf_opaque, attrib, attrib,
1268 tnf_opaque, ibmf_qp_handle, dest_ibmf_qp_handle);
1269
1270 /* set default for error checking */
1271 *dest_classp = 0;
1272
1273 /*
1274 * Determine the class type
1275 */
1276 switch (class) {
1277 case MAD_MGMT_CLASS_SUBN_LID_ROUTED:
1278 case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE:
1279
1280 /*
1281 * tavor generates trap by sending mad with slid 0;
1282 * deliver this to SMA
1283 */
1284 if ((method == MAD_METHOD_TRAP) && (slid == 0)) {
1285 *dest_classp = SUBN_AGENT;
1286 break;
1287 }
1288
1289 /* this is derived from table 109 of IB Spec 1.1, vol1 */
1290 if (attrib == SM_SMINFO_ATTRID || method == MAD_METHOD_TRAP ||
1291 method == MAD_METHOD_GET_RESPONSE)
1292 *dest_classp = SUBN_MANAGER;
1293 else
1294 *dest_classp = SUBN_AGENT;
1295
1296 break;
1297 case MAD_MGMT_CLASS_SUBN_ADM:
1298
1299 /*
1300 * Deliver to SA client (agent) if packet was sent to default qp
1301 * Deliver to ibmf_saa client (manager) if packet was sent to
1302 * alternate qp
1303 */
1304 if (dest_ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
1305 *dest_classp = SUBN_ADM_AGENT;
1306 else
1307 *dest_classp = SUBN_ADM_MANAGER;
1308 break;
1309 case MAD_MGMT_CLASS_PERF:
1310
1311 /* Deliver to PM if response bit is set */
1312 if ((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT)
1313 *dest_classp = PERF_MANAGER;
1314 else
1315 *dest_classp = PERF_AGENT;
1316 break;
1317 case MAD_MGMT_CLASS_BM:
1318
1319 /*
1320 * Deliver to BM if response bit is set, packet is a trap,
1321 * or packet is a BMSend
1322 */
1323 if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1324 (method == MAD_METHOD_TRAP) ||
1325 ((method == MAD_METHOD_SEND) &&
1326 ((attrib_mod & IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT) ==
1327 IBMF_BM_MAD_ATTR_MOD_RESP)))
1328 *dest_classp = BM_MANAGER;
1329 else
1330 *dest_classp = BM_AGENT;
1331
1332 break;
1333 case MAD_MGMT_CLASS_DEV_MGT:
1334
1335 /* Deliver to DM if response bit is set or packet is a trap */
1336 if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1337 (method == MAD_METHOD_TRAP))
1338 *dest_classp = DEV_MGT_MANAGER;
1339 else
1340 *dest_classp = DEV_MGT_AGENT;
1341 break;
1342 case MAD_MGMT_CLASS_COMM_MGT:
1343 *dest_classp = COMM_MGT_MANAGER_AGENT;
1344 break;
1345 case MAD_MGMT_CLASS_SNMP:
1346 *dest_classp = SNMP_MANAGER_AGENT;
1347 break;
1348 default:
1349
1350 if ((class >= MAD_MGMT_CLASS_VENDOR_START) &&
1351 (class <= MAD_MGMT_CLASS_VENDOR_END)) {
1352 *dest_classp = VENDOR_09_MANAGER_AGENT +
1353 (class - MAD_MGMT_CLASS_VENDOR_START);
1354 } else if ((class >= MAD_MGMT_CLASS_VENDOR2_START) &&
1355 (class <= MAD_MGMT_CLASS_VENDOR2_END)) {
1356 *dest_classp = VENDOR_30_MANAGER_AGENT +
1357 (class - MAD_MGMT_CLASS_VENDOR2_START);
1358 } else if ((class >= MAD_MGMT_CLASS_APPLICATION_START) &&
1359 (class <= MAD_MGMT_CLASS_APPLICATION_END)) {
1360 *dest_classp = APPLICATION_10_MANAGER_AGENT +
1361 (class - MAD_MGMT_CLASS_APPLICATION_START);
1362 }
1363
1364 break;
1365 }
1366
1367 if (*dest_classp == 0) {
1368 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1369 ibmf_i_get_class_type_err, IBMF_TNF_TRACE, "",
1370 "ibmf_i_get_class(): %s, class = 0x%x\n",
1371 tnf_string, msg, "invalid class", tnf_opaque, class, class);
1372 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_class_end,
1373 IBMF_TNF_TRACE, "", "ibmf_i_get_class() exit\n");
1374 return (IBMF_FAILURE);
1375 }
1376
1377 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1378 ibmf_i_get_class_end, IBMF_TNF_TRACE, "",
1379 "ibmf_i_get_class() exit, class = 0x%x\n",
1380 tnf_opaque, class, *dest_classp);
1381
1382 return (IBMF_SUCCESS);
1383 }
1384
1385 /*
1386 * ibmf_get_mod_name():
1387 * Constructs the module name based on the naming convention described in
1388 * PSARC case 2003/753.
1389 * The name should be "sunwibmgt<MgtClass><a_m>
1390 * where:
1391 * MgtClass = Management class field in the MAD header.
1392 * Two lower-case characters are used to represent
1393 * this 8-bit value as 2 hex digits.
1394 * a_m = "a" if the client is an agent-only module
1395 * "m" if the client is a manager-only module
1396 * "" if the client is both agent and manager.
1397 *
1398 * Input Argument
1399 * mad_class management class in the MAD header
1400 * class IBMF management class of incoming MAD
1401 *
1402 * Output Argument
1403 * modname pointer to the character array that holds the module name
1404 *
1405 * Status
1406 * None
1407 */
1408 static void
ibmf_get_mod_name(uint8_t mad_class,ibmf_client_type_t class,char * modname)1409 ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class, char *modname)
1410 {
1411 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_start,
1412 IBMF_TNF_TRACE, "", "ibmf_get_mod_name_qphdl() enter\n");
1413
1414 if (AGENT_CLASS(class)) {
1415 (void) sprintf(modname, "sunwibmgt%02xa", mad_class);
1416 } else if (MANAGER_CLASS(class)) {
1417 (void) sprintf(modname, "sunwibmgt%02xm", mad_class);
1418 } else {
1419 /* AGENT+MANAGER class */
1420 (void) sprintf(modname, "sunwibmgt%02x", mad_class);
1421 }
1422
1423 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_get_mod_name,
1424 IBMF_TNF_TRACE, "", "ibmf_get_mod_name(): name = %s\n",
1425 tnf_string, msg, modname);
1426
1427 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_end,
1428 IBMF_TNF_TRACE, "", "ibmf_get_mod_name() exit\n");
1429 }
1430
1431 /*
1432 * ibmf_send_busy():
1433 *
1434 * When a MAD request is received for an IB mandatory agent (BMA or PMA),
1435 * which has not yet registered with IBMF, IBMF returns a BUSY MAD
1436 * to the source of the request to solicit a retry while IBMF attempts
1437 * to load the mandatory agent.
1438 * A temporary, alternate QP is allocated for the purpose of sending the
1439 * MAD. This QP is configured to be in the same partition as the manager
1440 * that sent the request.
1441 *
1442 * Input Argument
1443 * modlargsp Pointer to ibmf_mod_load_args_t structure
1444 *
1445 * Output Argument
1446 * None
1447 *
1448 * Status
1449 * None
1450 */
1451 static void
ibmf_send_busy(ibmf_mod_load_args_t * modlargsp)1452 ibmf_send_busy(ibmf_mod_load_args_t *modlargsp)
1453 {
1454 ibmf_ci_t *cip = modlargsp->cip;
1455 ibmf_recv_wqe_t *recv_wqep = modlargsp->recv_wqep;
1456 ibt_wr_ds_t sgl[1];
1457 ibmf_send_wqe_t *send_wqep;
1458 ibt_send_wr_t *swrp;
1459 ibmf_msg_impl_t *msgimplp;
1460 ibmf_ud_dest_t *ibmf_ud_dest;
1461 ibt_ud_dest_t *ud_dest;
1462 ib_mad_hdr_t *smadhdrp, *rmadhdrp;
1463 ibt_adds_vect_t adds_vec;
1464 ibt_wc_t *wcp = &recv_wqep->recv_wc;
1465 ibt_status_t ibtstatus;
1466 uint_t num_work_reqs;
1467 ibt_qp_alloc_attr_t qp_attrs;
1468 ibt_qp_info_t qp_modify_attr;
1469 ibt_chan_sizes_t qp_sizes;
1470 ib_qpn_t qp_num;
1471 ibt_qp_hdl_t ibt_qp_handle;
1472 ibt_mr_hdl_t mem_hdl;
1473 ibt_mr_desc_t mem_desc;
1474 ibt_mr_attr_t mem_attr;
1475
1476 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_start,
1477 IBMF_TNF_TRACE, "", "ibmf_send_busy() enter\n");
1478
1479 /* setup the qp attrs for the alloc call */
1480 qp_attrs.qp_scq_hdl = cip->ci_alt_cq_handle;
1481 qp_attrs.qp_rcq_hdl = cip->ci_alt_cq_handle;
1482 qp_attrs.qp_pd_hdl = cip->ci_pd;
1483 qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
1484 qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1485 qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1486 qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1487 qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1488 qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1489
1490 /* request IBT for a qp with the desired attributes */
1491 ibtstatus = ibt_alloc_qp(cip->ci_ci_handle, IBT_UD_RQP,
1492 &qp_attrs, &qp_sizes, &qp_num, &ibt_qp_handle);
1493 if (ibtstatus != IBT_SUCCESS) {
1494 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1495 IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1496 tnf_string, msg, "failed to allocate alternate QP",
1497 tnf_int, ibt_status, ibtstatus);
1498 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1499 IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1500 return;
1501 }
1502
1503 qp_modify_attr.qp_trans = IBT_UD_SRV;
1504 qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1505 qp_modify_attr.qp_transport.ud.ud_qkey = IB_GSI_QKEY;
1506 qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1507 qp_modify_attr.qp_transport.ud.ud_pkey_ix = wcp->wc_pkey_ix;
1508 qp_modify_attr.qp_transport.ud.ud_port = recv_wqep->recv_port_num;
1509
1510 /* call the IB transport to initialize the QP */
1511 ibtstatus = ibt_initialize_qp(ibt_qp_handle, &qp_modify_attr);
1512 if (ibtstatus != IBT_SUCCESS) {
1513 (void) ibt_free_qp(ibt_qp_handle);
1514 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1515 IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1516 tnf_string, msg, "failed to initialize alternate QP",
1517 tnf_int, ibt_status, ibtstatus);
1518 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1519 IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1520 return;
1521 }
1522
1523 /* allocate the message context */
1524 msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1525 KM_SLEEP);
1526
1527 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
1528
1529 ibmf_i_pop_ud_dest_thread(cip);
1530
1531 /*
1532 * Get a UD dest structure from the pool, this will not fail
1533 * because ibmf_i_pop_ud_dest_thread() calls
1534 * ibmf_i_populate_ud_dest_list with the KM_SLEEP flag.
1535 */
1536 ibmf_ud_dest = ibmf_i_get_ud_dest(cip);
1537
1538 msgimplp->im_ibmf_ud_dest = ibmf_ud_dest;
1539 msgimplp->im_ud_dest = &ibmf_ud_dest->ud_dest;
1540 msgimplp->im_qp_hdl = NULL;
1541
1542 /*
1543 * Reset send_done to indicate we have not received the completion
1544 * for this send yet.
1545 */
1546 msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_SEND_DONE;
1547
1548 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
1549
1550 /*
1551 * Allocate resources needed to send a UD packet including the
1552 * send WQE context
1553 */
1554 send_wqep = (ibmf_send_wqe_t *)kmem_zalloc(sizeof (ibmf_send_wqe_t),
1555 KM_SLEEP);
1556 send_wqep->send_mem = (void *)kmem_zalloc(IBMF_MEM_PER_WQE, KM_SLEEP);
1557
1558 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1559 mem_attr.mr_len = IBMF_MEM_PER_WQE;
1560 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
1561 mem_attr.mr_as = NULL;
1562
1563 /* Register the allocated memory */
1564 ibtstatus = ibt_register_mr(cip->ci_ci_handle, cip->ci_pd, &mem_attr,
1565 &mem_hdl, &mem_desc);
1566 if (ibtstatus != IBT_SUCCESS) {
1567 kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1568 kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1569 ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1570 kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1571 (void) ibt_free_qp(ibt_qp_handle);
1572 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1573 IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1574 tnf_string, msg, "failed to register memory",
1575 tnf_int, ibt_status, ibtstatus);
1576 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1577 IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1578 return;
1579 }
1580
1581 send_wqep->send_sg_lkey = mem_desc.md_lkey;
1582 send_wqep->send_mem_hdl = mem_hdl;
1583
1584 swrp = &send_wqep->send_wr;
1585 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrp))
1586
1587 /* use send wqe pointer as the WR ID */
1588 swrp->wr_id = (ibt_wrid_t)(uintptr_t)send_wqep;
1589 ASSERT(swrp->wr_id != NULL);
1590 swrp->wr_flags = IBT_WR_NO_FLAGS;
1591 swrp->wr_opcode = IBT_WRC_SEND;
1592 swrp->wr_trans = IBT_UD_SRV;
1593
1594 send_wqep->send_client = NULL;
1595 send_wqep->send_msg = msgimplp;
1596
1597 /* Initialize the scatter-gather list */
1598 sgl[0].ds_va = (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1599 sgl[0].ds_key = send_wqep->send_sg_lkey;
1600 sgl[0].ds_len = IBMF_MAD_SIZE;
1601
1602 wcp = &recv_wqep->recv_wc;
1603
1604 /* Initialize the address vector */
1605 adds_vec.av_send_grh = B_FALSE;
1606 adds_vec.av_dlid = wcp->wc_slid;
1607 adds_vec.av_src_path = wcp->wc_path_bits;
1608 adds_vec.av_srvl = 0;
1609 adds_vec.av_srate = IBT_SRATE_1X;
1610 adds_vec.av_port_num = recv_wqep->recv_port_num;
1611
1612 ud_dest = msgimplp->im_ud_dest;
1613 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_dest))
1614 ud_dest->ud_qkey = IB_GSI_QKEY;
1615 ud_dest->ud_dst_qpn = wcp->wc_qpn;
1616
1617 /* modify the address handle with the address vector information */
1618 ibtstatus = ibt_modify_ah(cip->ci_ci_handle, ud_dest->ud_ah, &adds_vec);
1619 if (ibtstatus != IBT_SUCCESS) {
1620 (void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1621 kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1622 kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1623 ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1624 kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1625 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1626 IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1627 tnf_string, msg, "ibt modify ah failed", tnf_uint,
1628 ibt_status, ibtstatus);
1629 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1630 IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1631 return;
1632 }
1633
1634 bzero(send_wqep->send_mem, IBMF_MAD_SIZE);
1635
1636 rmadhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
1637 sizeof (ib_grh_t));
1638 smadhdrp = (ib_mad_hdr_t *)send_wqep->send_mem;
1639
1640 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rmadhdrp))
1641 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smadhdrp))
1642
1643 /* Set up the MAD header */
1644 smadhdrp->BaseVersion = rmadhdrp->BaseVersion;
1645 smadhdrp->MgmtClass = rmadhdrp->MgmtClass;
1646 smadhdrp->ClassVersion = rmadhdrp->ClassVersion;
1647 smadhdrp->R_Method = MAD_METHOD_GET_RESPONSE;
1648 smadhdrp->Status = MAD_STATUS_BUSY;
1649 smadhdrp->TransactionID = rmadhdrp->TransactionID;
1650 smadhdrp->AttributeID = rmadhdrp->AttributeID;
1651 smadhdrp->AttributeModifier = rmadhdrp->AttributeModifier;
1652
1653 swrp->wr_sgl = sgl;
1654 swrp->wr_nds = 1;
1655 swrp->wr.ud.udwr_dest = msgimplp->im_ud_dest;
1656 send_wqep->send_port_num = recv_wqep->recv_port_num;
1657 send_wqep->send_qp_handle = ibt_qp_handle;
1658 send_wqep->send_ibmf_qp_handle = NULL;
1659
1660 /* Post the MAD to the IBT layer */
1661 num_work_reqs = 1;
1662
1663 ibtstatus = ibt_post_send(ibt_qp_handle, &send_wqep->send_wr,
1664 num_work_reqs, NULL);
1665 if (ibtstatus != IBT_SUCCESS) {
1666 (void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1667 kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1668 kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1669 ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1670 kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1671 IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1672 ibmf_send_busy_err, IBMF_TNF_TRACE, "",
1673 "ibmf_send_busy(): %s, status = %d\n", tnf_string, msg,
1674 "post send failure", tnf_uint, ibt_status, ibtstatus);
1675 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1676 IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1677 return;
1678 }
1679
1680 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1681 IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1682 }
1683
1684 /*
1685 * ibmf_module_load():
1686 * This function attempts to load a client module that has not yet
1687 * registered with IBMF at the time a request MAD arrives for it.
1688 * Prior to loading the module, it sends a busy MAD to the sender of
1689 * the request MAD, this soliciting a resend of the request MAD.
1690 *
1691 * Input Argument
1692 * modlargsp Pointer to ibmf_mod_load_args_t structure
1693 *
1694 * Output Argument
1695 * None
1696 *
1697 * Status
1698 * None
1699 */
1700 static void
ibmf_module_load(void * taskq_arg)1701 ibmf_module_load(void *taskq_arg)
1702 {
1703 char *modname;
1704 ibmf_mod_load_args_t *modlargsp = (ibmf_mod_load_args_t *)taskq_arg;
1705 ibmf_ci_t *cip = modlargsp->cip;
1706 ibmf_recv_wqe_t *recv_wqep = modlargsp->recv_wqep;
1707 ibmf_client_type_t class = modlargsp->ibmf_class;
1708
1709 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_start,
1710 IBMF_TNF_TRACE, "", "ibmf_module_load_busy() enter\n");
1711 modname = modlargsp->modname;
1712
1713 if (IS_MANDATORY_CLASS(class)) {
1714 ibmf_send_busy(modlargsp);
1715 }
1716
1717 if (modload("misc", modname) < 0) {
1718 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1719 kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1720 IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1, ibmf_module_load_error,
1721 IBMF_TNF_TRACE, "",
1722 "ibmf_module_load(): modload failed for %s\n",
1723 tnf_string, module, modname);
1724 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1725 IBMF_TNF_TRACE, "", "ibmf_module_load() exit\n");
1726 return;
1727 }
1728
1729 (void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1730
1731 kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1732
1733 IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1734 IBMF_TNF_TRACE, "", "ibmf_module_load_busy() exit\n");
1735 }
1736