xref: /freebsd/sys/dev/iser/icl_iser.c (revision c6a43f7f10b183469c3804cfb31b29b6d938621b)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "icl_iser.h"
28 
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
30     "iSER module");
31 int iser_debug = 0;
32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
33     &iser_debug, 0, "Enable iser debug messages");
34 
35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
36 static uma_zone_t icl_pdu_zone;
37 
38 static volatile u_int	icl_iser_ncons;
39 struct iser_global ig;
40 
41 static void iser_conn_release(struct icl_conn *ic);
42 
43 static icl_conn_new_pdu_t	iser_conn_new_pdu;
44 static icl_conn_pdu_free_t	iser_conn_pdu_free;
45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
46 static icl_conn_pdu_append_data_t	iser_conn_pdu_append_data;
47 static icl_conn_pdu_queue_t	iser_conn_pdu_queue;
48 static icl_conn_handoff_t	iser_conn_handoff;
49 static icl_conn_free_t		iser_conn_free;
50 static icl_conn_close_t		iser_conn_close;
51 static icl_conn_connect_t	iser_conn_connect;
52 static icl_conn_task_setup_t	iser_conn_task_setup;
53 static icl_conn_task_done_t	iser_conn_task_done;
54 static icl_conn_pdu_get_data_t	iser_conn_pdu_get_data;
55 
56 static kobj_method_t icl_iser_methods[] = {
57 	KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
58 	KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
59 	KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
60 	KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
61 	KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
62 	KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
63 	KOBJMETHOD(icl_conn_free, iser_conn_free),
64 	KOBJMETHOD(icl_conn_close, iser_conn_close),
65 	KOBJMETHOD(icl_conn_connect, iser_conn_connect),
66 	KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
67 	KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
68 	KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
69 	{ 0, 0 }
70 };
71 
72 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
73 
74 /**
75  * iser_initialize_headers() - Initialize task headers
76  * @pdu:       iser pdu
77  * @iser_conn:    iser connection
78  *
79  * Notes:
80  * This routine may race with iser teardown flow for scsi
81  * error handling TMFs. So for TMF we should acquire the
82  * state mutex to avoid dereferencing the IB device which
83  * may have already been terminated (racing teardown sequence).
84  */
85 int
86 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
87 {
88 	struct iser_tx_desc *tx_desc = &pdu->desc;
89 	struct iser_device *device = iser_conn->ib_conn.device;
90 	u64 dma_addr;
91 	int ret = 0;
92 
93 	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
94 				ISER_HEADERS_LEN, DMA_TO_DEVICE);
95 	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
96 		ret = -ENOMEM;
97 		goto out;
98 	}
99 
100 	tx_desc->mapped = true;
101 	tx_desc->dma_addr = dma_addr;
102 	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
103 	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
104 	tx_desc->tx_sg[0].lkey   = device->mr->lkey;
105 
106 out:
107 
108 	return (ret);
109 }
110 
111 int
112 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
113 			  const void *addr, size_t len, int flags)
114 {
115 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
116 
117 	switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) {
118 	case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
119 	case ISCSI_BHS_OPCODE_TEXT_REQUEST:
120 		ISER_DBG("copy to login buff");
121 		memcpy(iser_conn->login_req_buf, addr, len);
122 		request->ip_data_len = len;
123 		break;
124 	}
125 
126 	return (0);
127 }
128 
129 void
130 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
131 		       size_t off, void *addr, size_t len)
132 {
133 	/* If we have a receive data, copy it to upper layer buffer */
134 	if (ip->ip_data_mbuf)
135 		memcpy(addr, ip->ip_data_mbuf + off, len);
136 }
137 
138 /*
139  * Allocate icl_pdu with empty BHS to fill up by the caller.
140  */
141 struct icl_pdu *
142 iser_new_pdu(struct icl_conn *ic, int flags)
143 {
144 	struct icl_iser_pdu *iser_pdu;
145 	struct icl_pdu *ip;
146 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
147 
148 	iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
149 	if (iser_pdu == NULL) {
150 		ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
151 		return (NULL);
152 	}
153 
154 	iser_pdu->iser_conn = iser_conn;
155 	ip = &iser_pdu->icl_pdu;
156 	ip->ip_conn = ic;
157 	ip->ip_bhs = &iser_pdu->desc.iscsi_header;
158 
159 	return (ip);
160 }
161 
162 struct icl_pdu *
163 iser_conn_new_pdu(struct icl_conn *ic, int flags)
164 {
165 	return (iser_new_pdu(ic, flags));
166 }
167 
168 void
169 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
170 {
171 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
172 
173 	uma_zfree(icl_pdu_zone, iser_pdu);
174 }
175 
176 size_t
177 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
178 				  const struct icl_pdu *request)
179 {
180 	uint32_t len = 0;
181 
182 	len += request->ip_bhs->bhs_data_segment_len[0];
183 	len <<= 8;
184 	len += request->ip_bhs->bhs_data_segment_len[1];
185 	len <<= 8;
186 	len += request->ip_bhs->bhs_data_segment_len[2];
187 
188 	return (len);
189 }
190 
191 void
192 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
193 {
194 	iser_pdu_free(ic, ip);
195 }
196 
197 static bool
198 is_control_opcode(uint8_t opcode)
199 {
200 	bool is_control = false;
201 
202 	switch (opcode & ISCSI_OPCODE_MASK) {
203 		case ISCSI_BHS_OPCODE_NOP_OUT:
204 		case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
205 		case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
206 		case ISCSI_BHS_OPCODE_TEXT_REQUEST:
207 			is_control = true;
208 			break;
209 		case ISCSI_BHS_OPCODE_SCSI_COMMAND:
210 			is_control = false;
211 			break;
212 		default:
213 			ISER_ERR("unknown opcode %d", opcode);
214 	}
215 
216 	return (is_control);
217 }
218 
219 void
220 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
221 {
222 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
223 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
224 	int ret;
225 
226 	if (iser_conn->state != ISER_CONN_UP)
227 		return;
228 
229 	ret = iser_initialize_headers(iser_pdu, iser_conn);
230 	if (ret) {
231 		ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
232 		return;
233 	}
234 
235 	if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
236 		ret = iser_send_control(iser_conn, iser_pdu);
237 		if (unlikely(ret))
238 			ISER_ERR("Failed to send control pdu %p", iser_pdu);
239 	} else {
240 		ret = iser_send_command(iser_conn, iser_pdu);
241 		if (unlikely(ret))
242 			ISER_ERR("Failed to send command pdu %p", iser_pdu);
243 	}
244 }
245 
246 static struct icl_conn *
247 iser_new_conn(const char *name, struct mtx *lock)
248 {
249 	struct iser_conn *iser_conn;
250 	struct icl_conn *ic;
251 
252 	refcount_acquire(&icl_iser_ncons);
253 
254 	iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
255 	if (!iser_conn) {
256 		ISER_ERR("failed to allocate iser conn");
257 		refcount_release(&icl_iser_ncons);
258 		return (NULL);
259 	}
260 
261 	cv_init(&iser_conn->up_cv, "iser_cv");
262 	sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
263 	mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
264 	cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
265 	mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
266 
267 	ic = &iser_conn->icl_conn;
268 	ic->ic_lock = lock;
269 	ic->ic_name = name;
270 	ic->ic_offload = strdup("iser", M_TEMP);
271 	ic->ic_iser = true;
272 	ic->ic_unmapped = true;
273 
274 	return (ic);
275 }
276 
277 void
278 iser_conn_free(struct icl_conn *ic)
279 {
280 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
281 
282 	iser_conn_release(ic);
283 	mtx_destroy(&iser_conn->ib_conn.lock);
284 	cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
285 	mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
286 	sx_destroy(&iser_conn->state_mutex);
287 	cv_destroy(&iser_conn->up_cv);
288 	kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
289 	refcount_release(&icl_iser_ncons);
290 }
291 
292 int
293 iser_conn_handoff(struct icl_conn *ic, int fd)
294 {
295 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
296 	int error = 0;
297 
298 	sx_xlock(&iser_conn->state_mutex);
299 	if (iser_conn->state != ISER_CONN_UP) {
300 		error = EINVAL;
301 		ISER_ERR("iser_conn %p state is %d, teardown started\n",
302 			 iser_conn, iser_conn->state);
303 		goto out;
304 	}
305 
306 	error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
307 	if (error)
308 		goto out;
309 
310 	error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
311 	if (error)
312 		goto post_error;
313 
314 	iser_conn->handoff_done = true;
315 
316 	sx_xunlock(&iser_conn->state_mutex);
317 	return (error);
318 
319 post_error:
320 	iser_free_rx_descriptors(iser_conn);
321 out:
322 	sx_xunlock(&iser_conn->state_mutex);
323 	return (error);
324 
325 }
326 
327 /**
328  * Frees all conn objects
329  */
330 static void
331 iser_conn_release(struct icl_conn *ic)
332 {
333 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
334 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
335 	struct iser_conn *curr, *tmp;
336 
337 	mtx_lock(&ig.connlist_mutex);
338 	/*
339 	 * Search for iser connection in global list.
340 	 * It may not be there in case of failure in connection establishment
341 	 * stage.
342 	 */
343 	list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
344 		if (iser_conn == curr) {
345 			ISER_WARN("found iser_conn %p", iser_conn);
346 			list_del(&iser_conn->conn_list);
347 		}
348 	}
349 	mtx_unlock(&ig.connlist_mutex);
350 
351 	/*
352 	 * In case we reconnecting or removing session, we need to
353 	 * release IB resources (which is safe to call more than once).
354 	 */
355 	sx_xlock(&iser_conn->state_mutex);
356 	iser_free_ib_conn_res(iser_conn, true);
357 	sx_xunlock(&iser_conn->state_mutex);
358 
359 	if (ib_conn->cma_id != NULL) {
360 		rdma_destroy_id(ib_conn->cma_id);
361 		ib_conn->cma_id = NULL;
362 	}
363 
364 }
365 
366 void
367 iser_conn_close(struct icl_conn *ic)
368 {
369 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
370 
371 	ISER_INFO("closing conn %p", iser_conn);
372 
373 	sx_xlock(&iser_conn->state_mutex);
374 	/*
375 	 * In case iser connection is waiting on conditional variable
376 	 * (state PENDING) and we try to close it before connection establishment,
377 	 * we need to signal it to continue releasing connection properly.
378 	 */
379 	if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
380 		cv_signal(&iser_conn->up_cv);
381 	sx_xunlock(&iser_conn->state_mutex);
382 
383 }
384 
385 int
386 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
387 		int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
388 {
389 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
390 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
391 	int err = 0;
392 
393 	iser_conn_release(ic);
394 
395 	sx_xlock(&iser_conn->state_mutex);
396 	 /* the device is known only --after-- address resolution */
397 	ib_conn->device = NULL;
398 	iser_conn->handoff_done = false;
399 
400 	iser_conn->state = ISER_CONN_PENDING;
401 
402 	ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
403 			RDMA_PS_TCP, IB_QPT_RC);
404 	if (IS_ERR(ib_conn->cma_id)) {
405 		err = -PTR_ERR(ib_conn->cma_id);
406 		ISER_ERR("rdma_create_id failed: %d", err);
407 		goto id_failure;
408 	}
409 
410 	err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
411 	if (err) {
412 		ISER_ERR("rdma_resolve_addr failed: %d", err);
413 		if (err < 0)
414 			err = -err;
415 		goto addr_failure;
416 	}
417 
418 	ISER_DBG("before cv_wait: %p", iser_conn);
419 	cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
420 	ISER_DBG("after cv_wait: %p", iser_conn);
421 
422 	if (iser_conn->state != ISER_CONN_UP) {
423 		err = EIO;
424 		goto addr_failure;
425 	}
426 
427 	err = iser_alloc_login_buf(iser_conn);
428 	if (err)
429 		goto addr_failure;
430 	sx_xunlock(&iser_conn->state_mutex);
431 
432 	mtx_lock(&ig.connlist_mutex);
433 	list_add(&iser_conn->conn_list, &ig.connlist);
434 	mtx_unlock(&ig.connlist_mutex);
435 
436 	return (0);
437 
438 id_failure:
439 	ib_conn->cma_id = NULL;
440 addr_failure:
441 	sx_xunlock(&iser_conn->state_mutex);
442 	return (err);
443 }
444 
445 int
446 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
447 		     struct ccb_scsiio *csio,
448 		     uint32_t *task_tagp, void **prvp)
449 {
450 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
451 
452 	*prvp = ip;
453 	iser_pdu->csio = csio;
454 
455 	return (0);
456 }
457 
458 void
459 iser_conn_task_done(struct icl_conn *ic, void *prv)
460 {
461 	struct icl_pdu *ip = prv;
462 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
463 	struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
464 	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
465 
466 	if (iser_pdu->dir[ISER_DIR_IN]) {
467 		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
468 		iser_dma_unmap_task_data(iser_pdu,
469 					 &iser_pdu->data[ISER_DIR_IN],
470 					 DMA_FROM_DEVICE);
471 	}
472 
473 	if (iser_pdu->dir[ISER_DIR_OUT]) {
474 		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
475 		iser_dma_unmap_task_data(iser_pdu,
476 					 &iser_pdu->data[ISER_DIR_OUT],
477 					 DMA_TO_DEVICE);
478 	}
479 
480 	if (likely(tx_desc->mapped)) {
481 		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
482 				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
483 		tx_desc->mapped = false;
484 	}
485 
486 	iser_pdu_free(ic, ip);
487 }
488 
489 static int
490 iser_limits(struct icl_drv_limits *idl)
491 {
492 
493 	idl->idl_max_recv_data_segment_length = 128 * 1024;
494 	idl->idl_max_send_data_segment_length = 128 * 1024;
495 	idl->idl_max_burst_length = 262144;
496 	idl->idl_first_burst_length = 65536;
497 
498 	return (0);
499 }
500 
501 static int
502 icl_iser_load(void)
503 {
504 	int error;
505 
506 	ISER_DBG("Starting iSER datamover...");
507 
508 	icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
509 				   NULL, NULL, NULL, NULL,
510 				   UMA_ALIGN_PTR, 0);
511 	/* FIXME: Check rc */
512 
513 	refcount_init(&icl_iser_ncons, 0);
514 
515 	error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
516 	KASSERT(error == 0, ("failed to register iser"));
517 
518 	memset(&ig, 0, sizeof(struct iser_global));
519 
520 	/* device init is called only after the first addr resolution */
521 	sx_init(&ig.device_list_mutex,  "global_device_lock");
522 	INIT_LIST_HEAD(&ig.device_list);
523 	mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
524 	INIT_LIST_HEAD(&ig.connlist);
525 	sx_init(&ig.close_conns_mutex,  "global_close_conns_lock");
526 
527 	return (error);
528 }
529 
530 static int
531 icl_iser_unload(void)
532 {
533 	ISER_DBG("Removing iSER datamover...");
534 
535 	if (icl_iser_ncons != 0)
536 		return (EBUSY);
537 
538 	sx_destroy(&ig.close_conns_mutex);
539 	mtx_destroy(&ig.connlist_mutex);
540 	sx_destroy(&ig.device_list_mutex);
541 
542 	icl_unregister("iser", true);
543 
544 	uma_zdestroy(icl_pdu_zone);
545 
546 	return (0);
547 }
548 
549 static int
550 icl_iser_modevent(module_t mod, int what, void *arg)
551 {
552 	switch (what) {
553 	case MOD_LOAD:
554 		return (icl_iser_load());
555 	case MOD_UNLOAD:
556 		return (icl_iser_unload());
557 	default:
558 		return (EINVAL);
559 	}
560 }
561 
562 moduledata_t icl_iser_data = {
563 	.name = "icl_iser",
564 	.evhand = icl_iser_modevent,
565 	.priv = 0
566 };
567 
568 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
569 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
570 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
571 MODULE_VERSION(icl_iser, 1);
572