1 /*-
2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "icl_iser.h"
27
28 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend");
29 static int iser_cq_poll_limit = 512;
30
31 static void
iser_cq_event_callback(struct ib_event * cause,void * context)32 iser_cq_event_callback(struct ib_event *cause, void *context)
33 {
34 ISER_ERR("got cq event %d", cause->event);
35 }
36
37 static void
iser_qp_event_callback(struct ib_event * cause,void * context)38 iser_qp_event_callback(struct ib_event *cause, void *context)
39 {
40 ISER_ERR("got qp event %d", cause->event);
41 }
42
43 static void
iser_event_handler(struct ib_event_handler * handler,struct ib_event * event)44 iser_event_handler(struct ib_event_handler *handler,
45 struct ib_event *event)
46 {
47 ISER_ERR("async event %d on device %s port %d",
48 event->event, event->device->name,
49 event->element.port_num);
50 }
51
52 /**
53 * is_iser_tx_desc - Indicate if the completion wr_id
54 * is a TX descriptor or not.
55 * @iser_conn: iser connection
56 * @wr_id: completion WR identifier
57 *
58 * Since we cannot rely on wc opcode in FLUSH errors
59 * we must work around it by checking if the wr_id address
60 * falls in the iser connection rx_descs buffer. If so
61 * it is an RX descriptor, otherwize it is a TX.
62 */
63 static inline bool
is_iser_tx_desc(struct iser_conn * iser_conn,void * wr_id)64 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
65 {
66 void *start = iser_conn->rx_descs;
67 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
68 void *end = (void *)((uintptr_t)start + (uintptr_t)len);
69
70 if (start) {
71 if (wr_id >= start && wr_id < end)
72 return false;
73 } else {
74 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf);
75 }
76
77 return true;
78 }
79
80 /**
81 * iser_handle_comp_error() - Handle error completion
82 * @ib_conn: connection RDMA resources
83 * @wc: work completion
84 *
85 * Notes: Update post_recv_buf_count in case of recv error completion.
86 * For non-FLUSH error completion we should also notify iscsi layer that
87 * connection is failed (in case we passed bind stage).
88 */
89 static void
iser_handle_comp_error(struct ib_conn * ib_conn,struct ib_wc * wc)90 iser_handle_comp_error(struct ib_conn *ib_conn,
91 struct ib_wc *wc)
92 {
93 void *wr_id = (void *)(uintptr_t)wc->wr_id;
94 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
95 ib_conn);
96
97 if (is_iser_tx_desc(iser_conn, wr_id)) {
98 ISER_DBG("conn %p got send comp error", iser_conn);
99 } else {
100 ISER_DBG("conn %p got recv comp error", iser_conn);
101 ib_conn->post_recv_buf_count--;
102 }
103 if (wc->status != IB_WC_WR_FLUSH_ERR)
104 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
105 }
106
107 /**
108 * iser_handle_wc - handle a single work completion
109 * @wc: work completion
110 *
111 * Soft-IRQ context, work completion can be either
112 * SEND or RECV, and can turn out successful or
113 * with error (or flush error).
114 */
iser_handle_wc(struct ib_wc * wc)115 static void iser_handle_wc(struct ib_wc *wc)
116 {
117 struct ib_conn *ib_conn;
118 struct iser_tx_desc *tx_desc;
119 struct iser_rx_desc *rx_desc;
120
121 ib_conn = wc->qp->qp_context;
122 if (likely(wc->status == IB_WC_SUCCESS)) {
123 if (wc->opcode == IB_WC_RECV) {
124 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
125 iser_rcv_completion(rx_desc, wc->byte_len,
126 ib_conn);
127 } else
128 if (wc->opcode == IB_WC_SEND) {
129 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
130 iser_snd_completion(tx_desc, ib_conn);
131 } else {
132 ISER_ERR("Unknown wc opcode %d", wc->opcode);
133 }
134 } else {
135 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
136 ib_conn);
137 if (wc->status != IB_WC_WR_FLUSH_ERR) {
138 ISER_ERR("conn %p wr id %llx status %d vend_err %x",
139 iser_conn, (unsigned long long)wc->wr_id,
140 wc->status, wc->vendor_err);
141 } else {
142 ISER_DBG("flush error: conn %p wr id %llx",
143 iser_conn, (unsigned long long)wc->wr_id);
144 }
145
146 if (wc->wr_id == ISER_BEACON_WRID) {
147 /* all flush errors were consumed */
148 mtx_lock(&ib_conn->beacon.flush_lock);
149 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn);
150 cv_signal(&ib_conn->beacon.flush_cv);
151 mtx_unlock(&ib_conn->beacon.flush_lock);
152 } else {
153 iser_handle_comp_error(ib_conn, wc);
154 }
155 }
156 }
157
158 static void
iser_cq_tasklet_fn(void * data,int pending)159 iser_cq_tasklet_fn(void *data, int pending)
160 {
161 struct iser_comp *comp = (struct iser_comp *)data;
162 struct ib_cq *cq = comp->cq;
163 struct ib_wc *const wcs = comp->wcs;
164 int completed = 0;
165 int i;
166 int n;
167
168 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
169 for (i = 0; i < n; i++)
170 iser_handle_wc(&wcs[i]);
171
172 completed += n;
173 if (completed >= iser_cq_poll_limit)
174 break;
175 }
176
177 /*
178 * It is assumed here that arming CQ only once its empty
179 * would not cause interrupts to be missed.
180 */
181 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
182 }
183
184 static void
iser_cq_callback(struct ib_cq * cq,void * cq_context)185 iser_cq_callback(struct ib_cq *cq, void *cq_context)
186 {
187 struct iser_comp *comp = cq_context;
188
189 taskqueue_enqueue(comp->tq, &comp->task);
190 }
191
192 /**
193 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
194 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
195 * the adapator.
196 *
197 * returns 0 on success, -1 on failure
198 */
199 static int
iser_create_device_ib_res(struct iser_device * device)200 iser_create_device_ib_res(struct iser_device *device)
201 {
202 struct ib_device *ib_dev = device->ib_device;
203 int i, max_cqe;
204
205 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
206 ISER_ERR("device %s doesn't support Fastreg, "
207 "can't register memory", device->ib_device->name);
208 return (1);
209 }
210
211 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors);
212
213 device->comps = malloc(device->comps_used * sizeof(*device->comps),
214 M_ISER_VERBS, M_WAITOK | M_ZERO);
215
216 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
217
218 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d",
219 device->comps_used, device->ib_device->name,
220 device->ib_device->num_comp_vectors, max_cqe);
221
222 device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY);
223 if (IS_ERR(device->pd))
224 goto pd_err;
225
226 for (i = 0; i < device->comps_used; i++) {
227 struct iser_comp *comp = &device->comps[i];
228 struct ib_cq_init_attr cq_attr = {
229 .cqe = max_cqe,
230 .comp_vector = i,
231 };
232
233 comp->device = device;
234 comp->cq = ib_create_cq(device->ib_device,
235 iser_cq_callback,
236 iser_cq_event_callback,
237 (void *)comp,
238 &cq_attr);
239 if (IS_ERR(comp->cq)) {
240 comp->cq = NULL;
241 goto cq_err;
242 }
243
244 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
245 goto cq_err;
246
247 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp);
248 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT,
249 taskqueue_thread_enqueue, &comp->tq);
250 if (!comp->tq)
251 goto tq_err;
252 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq");
253 }
254
255 device->mr = device->pd->__internal_mr;
256 if (IS_ERR(device->mr))
257 goto tq_err;
258
259 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
260 iser_event_handler);
261 ib_register_event_handler(&device->event_handler);
262 return (0);
263
264 tq_err:
265 for (i = 0; i < device->comps_used; i++) {
266 struct iser_comp *comp = &device->comps[i];
267 if (comp->tq)
268 taskqueue_free(comp->tq);
269 }
270 cq_err:
271 for (i = 0; i < device->comps_used; i++) {
272 struct iser_comp *comp = &device->comps[i];
273 if (comp->cq)
274 ib_destroy_cq(comp->cq);
275 }
276 ib_dealloc_pd(device->pd);
277 pd_err:
278 free(device->comps, M_ISER_VERBS);
279 ISER_ERR("failed to allocate an IB resource");
280 return (1);
281 }
282
283 /**
284 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
285 * CQ and PD created with the device associated with the adapator.
286 */
287 static void
iser_free_device_ib_res(struct iser_device * device)288 iser_free_device_ib_res(struct iser_device *device)
289 {
290 int i;
291
292 for (i = 0; i < device->comps_used; i++) {
293 struct iser_comp *comp = &device->comps[i];
294
295 taskqueue_free(comp->tq);
296 ib_destroy_cq(comp->cq);
297 comp->cq = NULL;
298 }
299
300 ib_unregister_event_handler(&device->event_handler);
301 ib_dealloc_pd(device->pd);
302
303 free(device->comps, M_ISER_VERBS);
304 device->comps = NULL;
305
306 device->mr = NULL;
307 device->pd = NULL;
308 }
309
310 static int
iser_alloc_reg_res(struct ib_device * ib_device,struct ib_pd * pd,struct iser_reg_resources * res)311 iser_alloc_reg_res(struct ib_device *ib_device,
312 struct ib_pd *pd,
313 struct iser_reg_resources *res)
314 {
315 int ret;
316
317 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1);
318 if (IS_ERR(res->mr)) {
319 ret = -PTR_ERR(res->mr);
320 ISER_ERR("Failed to allocate fast reg mr err=%d", ret);
321 return (ret);
322 }
323 res->mr_valid = 1;
324
325 return (0);
326 }
327
328 static void
iser_free_reg_res(struct iser_reg_resources * rsc)329 iser_free_reg_res(struct iser_reg_resources *rsc)
330 {
331 ib_dereg_mr(rsc->mr);
332 }
333
334 static struct fast_reg_descriptor *
iser_create_fastreg_desc(struct ib_device * ib_device,struct ib_pd * pd)335 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd)
336 {
337 struct fast_reg_descriptor *desc;
338 int ret;
339
340 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO);
341 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc);
342 if (ret) {
343 ISER_ERR("failed to allocate reg_resources");
344 goto err;
345 }
346
347 return (desc);
348 err:
349 free(desc, M_ISER_VERBS);
350 return (NULL);
351 }
352
353 /**
354 * iser_create_fmr_pool - Creates FMR pool and page_vector
355 *
356 * returns 0 on success, or errno code on failure
357 */
358 int
iser_create_fastreg_pool(struct ib_conn * ib_conn,unsigned cmds_max)359 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
360 {
361 struct iser_device *device = ib_conn->device;
362 struct fast_reg_descriptor *desc;
363 int i;
364
365 INIT_LIST_HEAD(&ib_conn->fastreg.pool);
366 ib_conn->fastreg.pool_size = 0;
367 for (i = 0; i < cmds_max; i++) {
368 desc = iser_create_fastreg_desc(device->ib_device, device->pd);
369 if (!desc) {
370 ISER_ERR("Failed to create fastreg descriptor");
371 goto err;
372 }
373
374 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
375 ib_conn->fastreg.pool_size++;
376 }
377
378 return (0);
379
380 err:
381 iser_free_fastreg_pool(ib_conn);
382 return (ENOMEM);
383 }
384
385 /**
386 * iser_free_fmr_pool - releases the FMR pool and page vec
387 */
388 void
iser_free_fastreg_pool(struct ib_conn * ib_conn)389 iser_free_fastreg_pool(struct ib_conn *ib_conn)
390 {
391 struct fast_reg_descriptor *desc, *tmp;
392 int i = 0;
393
394 if (list_empty(&ib_conn->fastreg.pool))
395 return;
396
397 ISER_DBG("freeing conn %p fr pool", ib_conn);
398
399 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
400 list_del(&desc->list);
401 iser_free_reg_res(&desc->rsc);
402 free(desc, M_ISER_VERBS);
403 ++i;
404 }
405
406 if (i < ib_conn->fastreg.pool_size)
407 ISER_WARN("pool still has %d regions registered",
408 ib_conn->fastreg.pool_size - i);
409 }
410
411 /**
412 * iser_create_ib_conn_res - Queue-Pair (QP)
413 *
414 * returns 0 on success, 1 on failure
415 */
416 static int
iser_create_ib_conn_res(struct ib_conn * ib_conn)417 iser_create_ib_conn_res(struct ib_conn *ib_conn)
418 {
419 struct iser_conn *iser_conn;
420 struct iser_device *device;
421 struct ib_device_attr *dev_attr;
422 struct ib_qp_init_attr init_attr;
423 int index, min_index = 0;
424 int ret = -ENOMEM;
425
426 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn);
427 device = ib_conn->device;
428 dev_attr = &device->dev_attr;
429
430 mtx_lock(&ig.connlist_mutex);
431 /* select the CQ with the minimal number of usages */
432 for (index = 0; index < device->comps_used; index++) {
433 if (device->comps[index].active_qps <
434 device->comps[min_index].active_qps)
435 min_index = index;
436 }
437 ib_conn->comp = &device->comps[min_index];
438 ib_conn->comp->active_qps++;
439 mtx_unlock(&ig.connlist_mutex);
440 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn);
441
442 memset(&init_attr, 0, sizeof init_attr);
443 init_attr.event_handler = iser_qp_event_callback;
444 init_attr.qp_context = (void *)ib_conn;
445 init_attr.send_cq = ib_conn->comp->cq;
446 init_attr.recv_cq = ib_conn->comp->cq;
447 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
448 init_attr.cap.max_send_sge = 2;
449 init_attr.cap.max_recv_sge = 1;
450 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
451 init_attr.qp_type = IB_QPT_RC;
452
453 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
454 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
455 iser_conn->max_cmds =
456 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
457 } else {
458 init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
459 iser_conn->max_cmds =
460 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
461 }
462 ISER_DBG("device %s supports max_send_wr %d",
463 device->ib_device->name, dev_attr->max_qp_wr);
464
465 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
466 if (ret)
467 goto out_err;
468
469 ib_conn->qp = ib_conn->cma_id->qp;
470 ISER_DBG("setting conn %p cma_id %p qp %p",
471 ib_conn, ib_conn->cma_id,
472 ib_conn->cma_id->qp);
473
474 return (ret);
475
476 out_err:
477 mtx_lock(&ig.connlist_mutex);
478 ib_conn->comp->active_qps--;
479 mtx_unlock(&ig.connlist_mutex);
480 ISER_ERR("unable to alloc mem or create resource, err %d", ret);
481
482 return (ret);
483 }
484
485 /**
486 * based on the resolved device node GUID see if there already allocated
487 * device for this device. If there's no such, create one.
488 */
489 static struct iser_device *
iser_device_find_by_ib_device(struct rdma_cm_id * cma_id)490 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
491 {
492 struct iser_device *device;
493
494 sx_xlock(&ig.device_list_mutex);
495
496 list_for_each_entry(device, &ig.device_list, ig_list)
497 /* find if there's a match using the node GUID */
498 if (device->ib_device->node_guid == cma_id->device->node_guid)
499 goto inc_refcnt;
500
501 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO);
502 /* assign this device to the device */
503 device->ib_device = cma_id->device;
504 /* init the device and link it into ig device list */
505 if (iser_create_device_ib_res(device)) {
506 free(device, M_ISER_VERBS);
507 device = NULL;
508 goto out;
509 }
510 list_add(&device->ig_list, &ig.device_list);
511
512 inc_refcnt:
513 device->refcount++;
514 ISER_INFO("device %p refcount %d", device, device->refcount);
515 out:
516 sx_xunlock(&ig.device_list_mutex);
517 return (device);
518 }
519
520 /* if there's no demand for this device, release it */
521 static void
iser_device_try_release(struct iser_device * device)522 iser_device_try_release(struct iser_device *device)
523 {
524 sx_xlock(&ig.device_list_mutex);
525 device->refcount--;
526 ISER_INFO("device %p refcount %d", device, device->refcount);
527 if (!device->refcount) {
528 iser_free_device_ib_res(device);
529 list_del(&device->ig_list);
530 free(device, M_ISER_VERBS);
531 device = NULL;
532 }
533 sx_xunlock(&ig.device_list_mutex);
534 }
535
536 /**
537 * Called with state mutex held
538 **/
iser_conn_state_comp_exch(struct iser_conn * iser_conn,enum iser_conn_state comp,enum iser_conn_state exch)539 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
540 enum iser_conn_state comp,
541 enum iser_conn_state exch)
542 {
543 int ret;
544
545 ret = (iser_conn->state == comp);
546 if (ret)
547 iser_conn->state = exch;
548
549 return ret;
550 }
551
552 /**
553 * iser_free_ib_conn_res - release IB related resources
554 * @iser_conn: iser connection struct
555 * @destroy: indicator if we need to try to release the
556 * iser device and memory regoins pool (only iscsi
557 * shutdown and DEVICE_REMOVAL will use this).
558 *
559 * This routine is called with the iser state mutex held
560 * so the cm_id removal is out of here. It is Safe to
561 * be invoked multiple times.
562 */
563 void
iser_free_ib_conn_res(struct iser_conn * iser_conn,bool destroy)564 iser_free_ib_conn_res(struct iser_conn *iser_conn,
565 bool destroy)
566 {
567 struct ib_conn *ib_conn = &iser_conn->ib_conn;
568 struct iser_device *device = ib_conn->device;
569
570 ISER_INFO("freeing conn %p cma_id %p qp %p",
571 iser_conn, ib_conn->cma_id, ib_conn->qp);
572
573 if (ib_conn->qp != NULL) {
574 mtx_lock(&ig.connlist_mutex);
575 ib_conn->comp->active_qps--;
576 mtx_unlock(&ig.connlist_mutex);
577 rdma_destroy_qp(ib_conn->cma_id);
578 ib_conn->qp = NULL;
579 }
580
581 if (destroy) {
582 if (iser_conn->login_buf)
583 iser_free_login_buf(iser_conn);
584
585 if (iser_conn->rx_descs)
586 iser_free_rx_descriptors(iser_conn);
587
588 if (device != NULL) {
589 iser_device_try_release(device);
590 ib_conn->device = NULL;
591 }
592 }
593 }
594
595 /**
596 * triggers start of the disconnect procedures and wait for them to be done
597 * Called with state mutex held
598 */
599 int
iser_conn_terminate(struct iser_conn * iser_conn)600 iser_conn_terminate(struct iser_conn *iser_conn)
601 {
602 struct ib_conn *ib_conn = &iser_conn->ib_conn;
603 const struct ib_send_wr *bad_send_wr;
604 const struct ib_recv_wr *bad_recv_wr;
605 int err = 0;
606
607 /* terminate the iser conn only if the conn state is UP */
608 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
609 ISER_CONN_TERMINATING))
610 return (0);
611
612 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state);
613
614 if (ib_conn->qp == NULL) {
615 /* HOW can this be??? */
616 ISER_WARN("qp wasn't created");
617 return (1);
618 }
619
620 /*
621 * Todo: This is a temporary workaround.
622 * We serialize the connection closure using global lock in order to
623 * receive all posted beacons completions.
624 * Without Serialization, in case we open many connections (QPs) on
625 * the same CQ, we might miss beacons because of missing interrupts.
626 */
627 sx_xlock(&ig.close_conns_mutex);
628
629 /*
630 * In case we didn't already clean up the cma_id (peer initiated
631 * a disconnection), we need to Cause the CMA to change the QP
632 * state to ERROR.
633 */
634 if (ib_conn->cma_id) {
635 err = rdma_disconnect(ib_conn->cma_id);
636 if (err)
637 ISER_ERR("Failed to disconnect, conn: 0x%p err %d",
638 iser_conn, err);
639
640 mtx_lock(&ib_conn->beacon.flush_lock);
641 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr));
642 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID;
643 ib_conn->beacon.send.opcode = IB_WR_SEND;
644 /* post an indication that all send flush errors were consumed */
645 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr);
646 if (err) {
647 ISER_ERR("conn %p failed to post send_beacon", ib_conn);
648 mtx_unlock(&ib_conn->beacon.flush_lock);
649 goto out;
650 }
651
652 ISER_DBG("before send cv_wait: %p", iser_conn);
653 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
654 ISER_DBG("after send cv_wait: %p", iser_conn);
655
656 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr));
657 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID;
658 /* post an indication that all recv flush errors were consumed */
659 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr);
660 if (err) {
661 ISER_ERR("conn %p failed to post recv_beacon", ib_conn);
662 mtx_unlock(&ib_conn->beacon.flush_lock);
663 goto out;
664 }
665
666 ISER_DBG("before recv cv_wait: %p", iser_conn);
667 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
668 mtx_unlock(&ib_conn->beacon.flush_lock);
669 ISER_DBG("after recv cv_wait: %p", iser_conn);
670 }
671 out:
672 sx_xunlock(&ig.close_conns_mutex);
673 return (1);
674 }
675
676 /**
677 * Called with state mutex held
678 **/
679 static void
iser_connect_error(struct rdma_cm_id * cma_id)680 iser_connect_error(struct rdma_cm_id *cma_id)
681 {
682 struct iser_conn *iser_conn;
683
684 iser_conn = cma_id->context;
685
686 ISER_ERR("conn %p", iser_conn);
687
688 iser_conn->state = ISER_CONN_TERMINATING;
689
690 cv_signal(&iser_conn->up_cv);
691 }
692
693 /**
694 * Called with state mutex held
695 **/
696 static void
iser_addr_handler(struct rdma_cm_id * cma_id)697 iser_addr_handler(struct rdma_cm_id *cma_id)
698 {
699 struct iser_device *device;
700 struct iser_conn *iser_conn;
701 struct ib_conn *ib_conn;
702 int ret;
703
704 iser_conn = cma_id->context;
705
706 ib_conn = &iser_conn->ib_conn;
707 device = iser_device_find_by_ib_device(cma_id);
708 if (!device) {
709 ISER_ERR("conn %p device lookup/creation failed",
710 iser_conn);
711 iser_connect_error(cma_id);
712 return;
713 }
714
715 ib_conn->device = device;
716
717 ret = rdma_resolve_route(cma_id, 1000);
718 if (ret) {
719 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret);
720 iser_connect_error(cma_id);
721 return;
722 }
723 }
724
725 /**
726 * Called with state mutex held
727 **/
728 static void
iser_route_handler(struct rdma_cm_id * cma_id)729 iser_route_handler(struct rdma_cm_id *cma_id)
730 {
731 struct rdma_conn_param conn_param;
732 int ret;
733 struct iser_cm_hdr req_hdr;
734 struct iser_conn *iser_conn = cma_id->context;
735 struct ib_conn *ib_conn = &iser_conn->ib_conn;
736 struct iser_device *device = ib_conn->device;
737
738 ret = iser_create_ib_conn_res(ib_conn);
739 if (ret)
740 goto failure;
741
742 memset(&conn_param, 0, sizeof conn_param);
743 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
744 conn_param.retry_count = 7;
745 conn_param.rnr_retry_count = 6;
746 /*
747 * Initiaotr depth should not be set, but in order to compat
748 * with old targets, we keep this value set.
749 */
750 conn_param.initiator_depth = 1;
751
752 memset(&req_hdr, 0, sizeof(req_hdr));
753 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
754 ISER_SEND_W_INV_NOT_SUPPORTED);
755 conn_param.private_data = (void *)&req_hdr;
756 conn_param.private_data_len = sizeof(struct iser_cm_hdr);
757
758 ret = rdma_connect(cma_id, &conn_param);
759 if (ret) {
760 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret);
761 goto failure;
762 }
763
764 return;
765 failure:
766 iser_connect_error(cma_id);
767 }
768
769 /**
770 * Called with state mutex held
771 **/
772 static void
iser_connected_handler(struct rdma_cm_id * cma_id)773 iser_connected_handler(struct rdma_cm_id *cma_id)
774 {
775 struct iser_conn *iser_conn;
776 struct ib_qp_attr attr;
777 struct ib_qp_init_attr init_attr;
778
779 iser_conn = cma_id->context;
780
781 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
782
783 ISER_INFO("remote qpn:%x my qpn:%x",
784 attr.dest_qp_num, cma_id->qp->qp_num);
785
786 iser_conn->state = ISER_CONN_UP;
787
788 cv_signal(&iser_conn->up_cv);
789 }
790
791 /**
792 * Called with state mutex held
793 **/
794 static void
iser_cleanup_handler(struct rdma_cm_id * cma_id,bool destroy)795 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy)
796 {
797 struct iser_conn *iser_conn = cma_id->context;
798
799 if (iser_conn_terminate(iser_conn))
800 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
801
802 }
803
804 int
iser_cma_handler(struct rdma_cm_id * cma_id,struct rdma_cm_event * event)805 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
806 {
807 struct iser_conn *iser_conn;
808 int ret = 0;
809
810 iser_conn = cma_id->context;
811 ISER_INFO("event %d status %d conn %p id %p",
812 event->event, event->status, cma_id->context, cma_id);
813
814 sx_xlock(&iser_conn->state_mutex);
815 switch (event->event) {
816 case RDMA_CM_EVENT_ADDR_RESOLVED:
817 iser_addr_handler(cma_id);
818 break;
819 case RDMA_CM_EVENT_ROUTE_RESOLVED:
820 iser_route_handler(cma_id);
821 break;
822 case RDMA_CM_EVENT_ESTABLISHED:
823 iser_connected_handler(cma_id);
824 break;
825 case RDMA_CM_EVENT_ADDR_ERROR:
826 case RDMA_CM_EVENT_ROUTE_ERROR:
827 case RDMA_CM_EVENT_CONNECT_ERROR:
828 case RDMA_CM_EVENT_UNREACHABLE:
829 case RDMA_CM_EVENT_REJECTED:
830 iser_connect_error(cma_id);
831 break;
832 case RDMA_CM_EVENT_DISCONNECTED:
833 case RDMA_CM_EVENT_ADDR_CHANGE:
834 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
835 iser_cleanup_handler(cma_id, false);
836 break;
837 default:
838 ISER_ERR("Unexpected RDMA CM event (%d)", event->event);
839 break;
840 }
841 sx_xunlock(&iser_conn->state_mutex);
842
843 return (ret);
844 }
845
846 int
iser_post_recvl(struct iser_conn * iser_conn)847 iser_post_recvl(struct iser_conn *iser_conn)
848 {
849 const struct ib_recv_wr *rx_wr_failed;
850 struct ib_recv_wr rx_wr;
851 struct ib_conn *ib_conn = &iser_conn->ib_conn;
852 struct ib_sge sge;
853 int ib_ret;
854
855 sge.addr = iser_conn->login_resp_dma;
856 sge.length = ISER_RX_LOGIN_SIZE;
857 sge.lkey = ib_conn->device->mr->lkey;
858
859 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
860 rx_wr.sg_list = &sge;
861 rx_wr.num_sge = 1;
862 rx_wr.next = NULL;
863
864 ib_conn->post_recv_buf_count++;
865 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
866 if (ib_ret) {
867 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
868 ib_conn->post_recv_buf_count--;
869 }
870
871 return (ib_ret);
872 }
873
874 int
iser_post_recvm(struct iser_conn * iser_conn,int count)875 iser_post_recvm(struct iser_conn *iser_conn, int count)
876 {
877 const struct ib_recv_wr *rx_wr_failed;
878 struct ib_recv_wr *rx_wr;
879 int i, ib_ret;
880 struct ib_conn *ib_conn = &iser_conn->ib_conn;
881 unsigned int my_rx_head = iser_conn->rx_desc_head;
882 struct iser_rx_desc *rx_desc;
883
884 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
885 rx_desc = &iser_conn->rx_descs[my_rx_head];
886 rx_wr->wr_id = (uintptr_t)rx_desc;
887 rx_wr->sg_list = &rx_desc->rx_sg;
888 rx_wr->num_sge = 1;
889 rx_wr->next = rx_wr + 1;
890 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos;
891 }
892
893 rx_wr--;
894 rx_wr->next = NULL; /* mark end of work requests list */
895
896 ib_conn->post_recv_buf_count += count;
897 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
898 if (ib_ret) {
899 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
900 ib_conn->post_recv_buf_count -= count;
901 } else
902 iser_conn->rx_desc_head = my_rx_head;
903
904 return (ib_ret);
905 }
906
907 /**
908 * iser_start_send - Initiate a Send DTO operation
909 *
910 * returns 0 on success, -1 on failure
911 */
iser_post_send(struct ib_conn * ib_conn,struct iser_tx_desc * tx_desc,bool signal)912 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
913 bool signal)
914 {
915 int ib_ret;
916 const struct ib_send_wr *send_wr_failed;
917 struct ib_send_wr send_wr;
918
919 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
920 tx_desc->dma_addr, ISER_HEADERS_LEN,
921 DMA_TO_DEVICE);
922
923 send_wr.next = NULL;
924 send_wr.wr_id = (uintptr_t)tx_desc;
925 send_wr.sg_list = tx_desc->tx_sg;
926 send_wr.num_sge = tx_desc->num_sge;
927 send_wr.opcode = IB_WR_SEND;
928 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
929
930 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
931 if (ib_ret)
932 ISER_ERR("ib_post_send failed, ret:%d", ib_ret);
933
934 return (ib_ret);
935 }
936