xref: /freebsd/sys/dev/iser/icl_iser.c (revision 4814a0a4ce8983cdae2a40a568d17f4c03baef3c)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "icl_iser.h"
28 
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module");
30 int iser_debug = 0;
31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
32     &iser_debug, 0, "Enable iser debug messages");
33 
34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
35 static uma_zone_t icl_pdu_zone;
36 
37 static volatile u_int	icl_iser_ncons;
38 struct iser_global ig;
39 
40 static icl_conn_new_pdu_t	iser_conn_new_pdu;
41 static icl_conn_pdu_free_t	iser_conn_pdu_free;
42 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
43 static icl_conn_pdu_append_data_t	iser_conn_pdu_append_data;
44 static icl_conn_pdu_queue_t	iser_conn_pdu_queue;
45 static icl_conn_handoff_t	iser_conn_handoff;
46 static icl_conn_free_t		iser_conn_free;
47 static icl_conn_close_t		iser_conn_close;
48 static icl_conn_release_t	iser_conn_release;
49 static icl_conn_connect_t	iser_conn_connect;
50 static icl_conn_connected_t	iser_conn_connected;
51 static icl_conn_task_setup_t	iser_conn_task_setup;
52 static icl_conn_task_done_t	iser_conn_task_done;
53 static icl_conn_pdu_get_data_t	iser_conn_pdu_get_data;
54 
55 static kobj_method_t icl_iser_methods[] = {
56 	KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
57 	KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
58 	KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
59 	KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
60 	KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
61 	KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
62 	KOBJMETHOD(icl_conn_free, iser_conn_free),
63 	KOBJMETHOD(icl_conn_close, iser_conn_close),
64 	KOBJMETHOD(icl_conn_release, iser_conn_release),
65 	KOBJMETHOD(icl_conn_connect, iser_conn_connect),
66 	KOBJMETHOD(icl_conn_connected, iser_conn_connected),
67 	KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
68 	KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
69 	KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
70 	{ 0, 0 }
71 };
72 
73 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
74 
75 /**
76  * iser_initialize_headers() - Initialize task headers
77  * @pdu:       iser pdu
78  * @iser_conn:    iser connection
79  *
80  * Notes:
81  * This routine may race with iser teardown flow for scsi
82  * error handling TMFs. So for TMF we should acquire the
83  * state mutex to avoid dereferencing the IB device which
84  * may have already been terminated (racing teardown sequence).
85  */
86 int
87 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
88 {
89 	struct iser_tx_desc *tx_desc = &pdu->desc;
90 	struct iser_device *device = iser_conn->ib_conn.device;
91 	u64 dma_addr;
92 	int ret = 0;
93 
94 	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
95 				ISER_HEADERS_LEN, DMA_TO_DEVICE);
96 	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
97 		ret = -ENOMEM;
98 		goto out;
99 	}
100 
101 	tx_desc->mapped = true;
102 	tx_desc->dma_addr = dma_addr;
103 	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
104 	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
105 	tx_desc->tx_sg[0].lkey   = device->mr->lkey;
106 
107 out:
108 
109 	return (ret);
110 }
111 
112 int
113 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
114 			  const void *addr, size_t len, int flags)
115 {
116 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
117 
118 	if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
119 	    request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
120 		ISER_DBG("copy to login buff");
121 		memcpy(iser_conn->login_req_buf, addr, len);
122 		request->ip_data_len = len;
123 	}
124 
125 	return (0);
126 }
127 
128 void
129 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
130 		       size_t off, void *addr, size_t len)
131 {
132 	/* If we have a receive data, copy it to upper layer buffer */
133 	if (ip->ip_data_mbuf)
134 		memcpy(addr, ip->ip_data_mbuf + off, len);
135 }
136 
137 /*
138  * Allocate icl_pdu with empty BHS to fill up by the caller.
139  */
140 struct icl_pdu *
141 iser_new_pdu(struct icl_conn *ic, int flags)
142 {
143 	struct icl_iser_pdu *iser_pdu;
144 	struct icl_pdu *ip;
145 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
146 
147 	iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
148 	if (iser_pdu == NULL) {
149 		ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
150 		return (NULL);
151 	}
152 
153 	iser_pdu->iser_conn = iser_conn;
154 	ip = &iser_pdu->icl_pdu;
155 	ip->ip_conn = ic;
156 	ip->ip_bhs = &iser_pdu->desc.iscsi_header;
157 
158 	return (ip);
159 }
160 
161 struct icl_pdu *
162 iser_conn_new_pdu(struct icl_conn *ic, int flags)
163 {
164 	return (iser_new_pdu(ic, flags));
165 }
166 
167 void
168 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
169 {
170 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
171 
172 	uma_zfree(icl_pdu_zone, iser_pdu);
173 }
174 
175 size_t
176 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
177 				  const struct icl_pdu *request)
178 {
179 	uint32_t len = 0;
180 
181 	len += request->ip_bhs->bhs_data_segment_len[0];
182 	len <<= 8;
183 	len += request->ip_bhs->bhs_data_segment_len[1];
184 	len <<= 8;
185 	len += request->ip_bhs->bhs_data_segment_len[2];
186 
187 	return (len);
188 }
189 
190 void
191 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
192 {
193 	iser_pdu_free(ic, ip);
194 }
195 
196 static bool
197 is_control_opcode(uint8_t opcode)
198 {
199 	bool is_control = false;
200 
201 	switch (opcode & ISCSI_OPCODE_MASK) {
202 		case ISCSI_BHS_OPCODE_NOP_OUT:
203 		case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
204 		case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
205 		case ISCSI_BHS_OPCODE_TEXT_REQUEST:
206 			is_control = true;
207 			break;
208 		case ISCSI_BHS_OPCODE_SCSI_COMMAND:
209 			is_control = false;
210 			break;
211 		default:
212 			ISER_ERR("unknown opcode %d", opcode);
213 	}
214 
215 	return (is_control);
216 }
217 
218 void
219 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
220 {
221 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
222 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
223 	int ret;
224 
225 	ret = iser_initialize_headers(iser_pdu, iser_conn);
226 	if (ret) {
227 		ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
228 		return;
229 	}
230 
231 	if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
232 		ret = iser_send_control(iser_conn, iser_pdu);
233 		if (unlikely(ret))
234 			ISER_ERR("Failed to send control pdu %p", iser_pdu);
235 	} else {
236 		ret = iser_send_command(iser_conn, iser_pdu);
237 		if (unlikely(ret))
238 			ISER_ERR("Failed to send command pdu %p", iser_pdu);
239 	}
240 }
241 
242 static struct icl_conn *
243 iser_new_conn(const char *name, struct mtx *lock)
244 {
245 	struct iser_conn *iser_conn;
246 	struct icl_conn *ic;
247 
248 	refcount_acquire(&icl_iser_ncons);
249 
250 	iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
251 	if (!iser_conn) {
252 		ISER_ERR("failed to allocate iser conn");
253 		refcount_release(&icl_iser_ncons);
254 		return (NULL);
255 	}
256 
257 	cv_init(&iser_conn->up_cv, "iser_cv");
258 	sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
259 	mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF);
260 	cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
261 	mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF);
262 
263 	ic = &iser_conn->icl_conn;
264 	ic->ic_lock = lock;
265 	ic->ic_name = name;
266 	ic->ic_driver = strdup("iser", M_TEMP);
267 	ic->ic_iser = true;
268 
269 	return (ic);
270 }
271 
272 void
273 iser_conn_free(struct icl_conn *ic)
274 {
275 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
276 
277 	cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
278 	mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
279 	sx_destroy(&iser_conn->state_mutex);
280 	cv_destroy(&iser_conn->up_cv);
281 	kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
282 	refcount_release(&icl_iser_ncons);
283 }
284 
285 int
286 iser_conn_handoff(struct icl_conn *ic, int cmds_max)
287 {
288 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
289 	int error = 0;
290 
291 	sx_xlock(&iser_conn->state_mutex);
292 	if (iser_conn->state != ISER_CONN_UP) {
293 		error = EINVAL;
294 		ISER_ERR("iser_conn %p state is %d, teardown started\n",
295 			 iser_conn, iser_conn->state);
296 		goto out;
297 	}
298 
299 	/*
300 	 * In discovery session no need to allocate rx desc and posting recv
301 	 * work request
302 	 */
303 	if (ic->ic_session_type_discovery(ic))
304 		goto out;
305 
306 	error = iser_alloc_rx_descriptors(iser_conn, cmds_max);
307 	if (error)
308 		goto out;
309 
310 	error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
311 	if (error)
312 		goto post_error;
313 
314 	sx_xunlock(&iser_conn->state_mutex);
315 	return (error);
316 
317 post_error:
318 	iser_free_rx_descriptors(iser_conn);
319 out:
320 	sx_xunlock(&iser_conn->state_mutex);
321 	return (error);
322 
323 }
324 
325 /**
326  * Frees all conn objects
327  */
328 void
329 iser_conn_release(struct icl_conn *ic)
330 {
331 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
332 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
333 	struct iser_conn *curr, *tmp;
334 
335 	mtx_lock(&ig.connlist_mutex);
336 	/*
337 	 * Search for iser connection in global list.
338 	 * It may not be there in case of failure in connection establishment
339 	 * stage.
340 	 */
341 	list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
342 		if (iser_conn == curr) {
343 			ISER_WARN("found iser_conn %p", iser_conn);
344 			list_del(&iser_conn->conn_list);
345 		}
346 	}
347 	mtx_unlock(&ig.connlist_mutex);
348 
349 	/*
350 	 * In case we reconnecting or removing session, we need to
351 	 * release IB resources (which is safe to call more than once).
352 	 */
353 	sx_xlock(&iser_conn->state_mutex);
354 	iser_free_ib_conn_res(iser_conn, true);
355 	sx_xunlock(&iser_conn->state_mutex);
356 
357 	if (ib_conn->cma_id != NULL) {
358 		rdma_destroy_id(ib_conn->cma_id);
359 		ib_conn->cma_id = NULL;
360 	}
361 
362 }
363 
364 void
365 iser_conn_close(struct icl_conn *ic)
366 {
367 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
368 
369 	ISER_INFO("closing conn %p", iser_conn);
370 
371 	sx_xlock(&iser_conn->state_mutex);
372 	/*
373 	 * In case iser connection is waiting on conditional variable
374 	 * (state PENDING) and we try to close it before connection establishment,
375 	 * we need to signal it to continue releasing connection properly.
376 	 */
377 	if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
378 		cv_signal(&iser_conn->up_cv);
379 	sx_xunlock(&iser_conn->state_mutex);
380 
381 }
382 
383 int
384 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
385 		int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
386 {
387 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
388 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
389 	int err = 0;
390 
391 	sx_xlock(&iser_conn->state_mutex);
392 	 /* the device is known only --after-- address resolution */
393 	ib_conn->device = NULL;
394 
395 	iser_conn->state = ISER_CONN_PENDING;
396 
397 	ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn,
398 			RDMA_PS_TCP, IB_QPT_RC);
399 	if (IS_ERR(ib_conn->cma_id)) {
400 		err = -PTR_ERR(ib_conn->cma_id);
401 		ISER_ERR("rdma_create_id failed: %d", err);
402 		goto id_failure;
403 	}
404 
405 	err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
406 	if (err) {
407 		ISER_ERR("rdma_resolve_addr failed: %d", err);
408 		if (err < 0)
409 			err = -err;
410 		goto addr_failure;
411 	}
412 
413 	ISER_DBG("before cv_wait: %p", iser_conn);
414 	cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
415 	ISER_DBG("after cv_wait: %p", iser_conn);
416 
417 	if (iser_conn->state != ISER_CONN_UP) {
418 		err = EIO;
419 		goto addr_failure;
420 	}
421 
422 	err = iser_alloc_login_buf(iser_conn);
423 	if (err)
424 		goto addr_failure;
425 	sx_xunlock(&iser_conn->state_mutex);
426 
427 	mtx_lock(&ig.connlist_mutex);
428 	list_add(&iser_conn->conn_list, &ig.connlist);
429 	mtx_unlock(&ig.connlist_mutex);
430 
431 	return (0);
432 
433 id_failure:
434 	ib_conn->cma_id = NULL;
435 addr_failure:
436 	sx_xunlock(&iser_conn->state_mutex);
437 	return (err);
438 }
439 
440 /**
441  * Called with session spinlock held.
442  * No need to lock state mutex on an advisory check.
443  **/
444 bool
445 iser_conn_connected(struct icl_conn *ic)
446 {
447 	struct iser_conn *iser_conn = icl_to_iser_conn(ic);
448 
449 	return (iser_conn->state == ISER_CONN_UP);
450 }
451 
452 int
453 iser_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio,
454 		     uint32_t *task_tagp, void **prvp, struct icl_pdu *ip)
455 {
456 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
457 
458 	*prvp = ip;
459 	iser_pdu->csio = csio;
460 
461 	return (0);
462 }
463 
464 void
465 iser_conn_task_done(struct icl_conn *ic, void *prv)
466 {
467 	struct icl_pdu *ip = prv;
468 	struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
469 	struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
470 	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
471 
472 	if (iser_pdu->dir[ISER_DIR_IN]) {
473 		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
474 		iser_dma_unmap_task_data(iser_pdu,
475 					 &iser_pdu->data[ISER_DIR_IN],
476 					 DMA_FROM_DEVICE);
477 	}
478 
479 	if (iser_pdu->dir[ISER_DIR_OUT]) {
480 		iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
481 		iser_dma_unmap_task_data(iser_pdu,
482 					 &iser_pdu->data[ISER_DIR_OUT],
483 					 DMA_TO_DEVICE);
484 	}
485 
486 	if (likely(tx_desc->mapped)) {
487 		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
488 				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
489 		tx_desc->mapped = false;
490 	}
491 
492 	iser_pdu_free(ic, ip);
493 }
494 
495 static u_int32_t
496 iser_hba_misc()
497 {
498 	return (PIM_UNMAPPED);
499 }
500 
501 static int
502 iser_limits(size_t *limitp)
503 {
504 	*limitp = 128 * 1024;
505 
506 	return (0);
507 }
508 
509 static int
510 icl_iser_load(void)
511 {
512 	int error;
513 
514 	ISER_DBG("Starting iSER datamover...");
515 
516 	icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
517 				   NULL, NULL, NULL, NULL,
518 				   UMA_ALIGN_PTR, 0);
519 	/* FIXME: Check rc */
520 
521 	refcount_init(&icl_iser_ncons, 0);
522 
523 	error = icl_register("iser", 0, iser_limits, iser_new_conn, iser_hba_misc);
524 	KASSERT(error == 0, ("failed to register iser"));
525 
526 	memset(&ig, 0, sizeof(struct iser_global));
527 
528 	/* device init is called only after the first addr resolution */
529 	sx_init(&ig.device_list_mutex,  "global_device_lock");
530 	INIT_LIST_HEAD(&ig.device_list);
531 	mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF);
532 	INIT_LIST_HEAD(&ig.connlist);
533 	sx_init(&ig.close_conns_mutex,  "global_close_conns_lock");
534 
535 	return (error);
536 }
537 
538 static int
539 icl_iser_unload(void)
540 {
541 	ISER_DBG("Removing iSER datamover...");
542 
543 	if (icl_iser_ncons != 0)
544 		return (EBUSY);
545 
546 	sx_destroy(&ig.close_conns_mutex);
547 	mtx_destroy(&ig.connlist_mutex);
548 	sx_destroy(&ig.device_list_mutex);
549 
550 	icl_unregister("iser");
551 
552 	uma_zdestroy(icl_pdu_zone);
553 
554 	return (0);
555 }
556 
557 static int
558 icl_iser_modevent(module_t mod, int what, void *arg)
559 {
560 	switch (what) {
561 	case MOD_LOAD:
562 		return (icl_iser_load());
563 	case MOD_UNLOAD:
564 		return (icl_iser_unload());
565 	default:
566 		return (EINVAL);
567 	}
568 }
569 
570 moduledata_t icl_iser_data = {
571 	.name = "icl_iser",
572 	.evhand = icl_iser_modevent,
573 	.priv = 0
574 };
575 
576 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
577 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
578 MODULE_DEPEND(icl_iser, iscsi, 1, 1, 1);
579 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
580 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
581 MODULE_VERSION(icl_iser, 1);
582 
583