xref: /freebsd/sys/dev/iser/iser_initiator.c (revision 71625ec9ad2a9bc8c09784fbd23b759830e0ee5f)
1 /*-
2  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "icl_iser.h"
27 
28 static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");
29 
30 /* Register user buffer memory and initialize passive rdma
31  *  dto descriptor. Data size is stored in
32  *  task->data[ISER_DIR_IN].data_len, Protection size
33  *  os stored in task->prot[ISER_DIR_IN].data_len
34  */
35 static int
iser_prepare_read_cmd(struct icl_iser_pdu * iser_pdu)36 iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
37 {
38 	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
39 	struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
40 	struct iser_mem_reg *mem_reg;
41 	int err;
42 
43 	err = iser_dma_map_task_data(iser_pdu,
44 				     buf_in,
45 				     ISER_DIR_IN,
46 				     DMA_FROM_DEVICE);
47 	if (err)
48 		return (err);
49 
50 	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
51 	if (err) {
52 		ISER_ERR("Failed to set up Data-IN RDMA");
53 		return (err);
54 	}
55 
56 	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];
57 
58 	hdr->flags    |= ISER_RSV;
59 	hdr->read_stag = cpu_to_be32(mem_reg->rkey);
60 	hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);
61 
62 	return (0);
63 }
64 
65 /* Register user buffer memory and initialize passive rdma
66  *  dto descriptor. Data size is stored in
67  *  task->data[ISER_DIR_OUT].data_len, Protection size
68  *  is stored at task->prot[ISER_DIR_OUT].data_len
69  */
70 static int
iser_prepare_write_cmd(struct icl_iser_pdu * iser_pdu)71 iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
72 {
73 	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
74 	struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
75 	struct iser_mem_reg *mem_reg;
76 	int err;
77 
78 	err = iser_dma_map_task_data(iser_pdu,
79 				     buf_out,
80 				     ISER_DIR_OUT,
81 				     DMA_TO_DEVICE);
82 	if (err)
83 		return (err);
84 
85 	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
86 	if (err) {
87 		ISER_ERR("Failed to set up Data-out RDMA");
88 		return (err);
89 	}
90 
91 	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];
92 
93 	hdr->flags     |= ISER_WSV;
94 	hdr->write_stag = cpu_to_be32(mem_reg->rkey);
95 	hdr->write_va   = cpu_to_be64(mem_reg->sge.addr);
96 
97 	return (0);
98 }
99 
100 /* creates a new tx descriptor and adds header regd buffer */
101 void
iser_create_send_desc(struct iser_conn * iser_conn,struct iser_tx_desc * tx_desc)102 iser_create_send_desc(struct iser_conn *iser_conn,
103 		      struct iser_tx_desc *tx_desc)
104 {
105 	struct iser_device *device = iser_conn->ib_conn.device;
106 
107 	ib_dma_sync_single_for_cpu(device->ib_device,
108 		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
109 
110 	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
111 	tx_desc->iser_header.flags = ISER_VER;
112 
113 	tx_desc->num_sge = 1;
114 
115 	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
116 		tx_desc->tx_sg[0].lkey = device->mr->lkey;
117 		ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
118 	}
119 }
120 
121 void
iser_free_login_buf(struct iser_conn * iser_conn)122 iser_free_login_buf(struct iser_conn *iser_conn)
123 {
124 	struct iser_device *device = iser_conn->ib_conn.device;
125 
126 	if (!iser_conn->login_buf)
127 		return;
128 
129 	if (iser_conn->login_req_dma)
130 		ib_dma_unmap_single(device->ib_device,
131 				    iser_conn->login_req_dma,
132 				    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
133 
134 	if (iser_conn->login_resp_dma)
135 		ib_dma_unmap_single(device->ib_device,
136 				    iser_conn->login_resp_dma,
137 				    ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
138 
139 	free(iser_conn->login_buf, M_ISER_INITIATOR);
140 
141 	/* make sure we never redo any unmapping */
142 	iser_conn->login_req_dma = 0;
143 	iser_conn->login_resp_dma = 0;
144 	iser_conn->login_buf = NULL;
145 }
146 
147 int
iser_alloc_login_buf(struct iser_conn * iser_conn)148 iser_alloc_login_buf(struct iser_conn *iser_conn)
149 {
150 	struct iser_device *device = iser_conn->ib_conn.device;
151 	int req_err, resp_err;
152 
153 	BUG_ON(device == NULL);
154 
155 	iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
156 				      M_ISER_INITIATOR, M_WAITOK | M_ZERO);
157 
158 	if (!iser_conn->login_buf)
159 		goto out_err;
160 
161 	iser_conn->login_req_buf  = iser_conn->login_buf;
162 	iser_conn->login_resp_buf = iser_conn->login_buf +
163 				    ISCSI_DEF_MAX_RECV_SEG_LEN;
164 
165 	iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
166 						     iser_conn->login_req_buf,
167 						     ISCSI_DEF_MAX_RECV_SEG_LEN,
168 						     DMA_TO_DEVICE);
169 
170 	iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
171 						      iser_conn->login_resp_buf,
172 						      ISER_RX_LOGIN_SIZE,
173 						      DMA_FROM_DEVICE);
174 
175 	req_err  = ib_dma_mapping_error(device->ib_device,
176 					iser_conn->login_req_dma);
177 	resp_err = ib_dma_mapping_error(device->ib_device,
178 					iser_conn->login_resp_dma);
179 
180 	if (req_err || resp_err) {
181 		if (req_err)
182 			iser_conn->login_req_dma = 0;
183 		if (resp_err)
184 			iser_conn->login_resp_dma = 0;
185 		goto free_login_buf;
186 	}
187 
188 	return (0);
189 
190 free_login_buf:
191 	iser_free_login_buf(iser_conn);
192 
193 out_err:
194 	ISER_DBG("unable to alloc or map login buf");
195 	return (ENOMEM);
196 }
197 
iser_alloc_rx_descriptors(struct iser_conn * iser_conn,int cmds_max)198 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
199 {
200 	int i, j;
201 	u64 dma_addr;
202 	struct iser_rx_desc *rx_desc;
203 	struct ib_sge       *rx_sg;
204 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
205 	struct iser_device *device = ib_conn->device;
206 
207 	iser_conn->qp_max_recv_dtos = cmds_max;
208 	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
209 
210 	if (iser_create_fastreg_pool(ib_conn, cmds_max))
211 		goto create_rdma_reg_res_failed;
212 
213 
214 	iser_conn->num_rx_descs = cmds_max;
215 	iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
216 				sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
217 				M_WAITOK | M_ZERO);
218 	if (!iser_conn->rx_descs)
219 		goto rx_desc_alloc_fail;
220 
221 	rx_desc = iser_conn->rx_descs;
222 
223 	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
224 		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
225 					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
226 		if (ib_dma_mapping_error(device->ib_device, dma_addr))
227 			goto rx_desc_dma_map_failed;
228 
229 		rx_desc->dma_addr = dma_addr;
230 
231 		rx_sg = &rx_desc->rx_sg;
232 		rx_sg->addr   = rx_desc->dma_addr;
233 		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
234 		rx_sg->lkey   = device->mr->lkey;
235 	}
236 
237 	iser_conn->rx_desc_head = 0;
238 
239 	return (0);
240 
241 rx_desc_dma_map_failed:
242 	rx_desc = iser_conn->rx_descs;
243 	for (j = 0; j < i; j++, rx_desc++)
244 		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
245 				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
246 	free(iser_conn->rx_descs, M_ISER_INITIATOR);
247 	iser_conn->rx_descs = NULL;
248 rx_desc_alloc_fail:
249 	iser_free_fastreg_pool(ib_conn);
250 create_rdma_reg_res_failed:
251 	ISER_ERR("failed allocating rx descriptors / data buffers");
252 
253 	return (ENOMEM);
254 }
255 
256 void
iser_free_rx_descriptors(struct iser_conn * iser_conn)257 iser_free_rx_descriptors(struct iser_conn *iser_conn)
258 {
259 	int i;
260 	struct iser_rx_desc *rx_desc;
261 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
262 	struct iser_device *device = ib_conn->device;
263 
264 	iser_free_fastreg_pool(ib_conn);
265 
266 	rx_desc = iser_conn->rx_descs;
267 	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
268 		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
269 				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
270 
271 	free(iser_conn->rx_descs, M_ISER_INITIATOR);
272 
273 	/* make sure we never redo any unmapping */
274 	iser_conn->rx_descs = NULL;
275 }
276 
277 static void
iser_buf_to_sg(void * buf,struct iser_data_buf * data_buf)278 iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
279 {
280 	struct scatterlist *sg;
281 	int i;
282 	size_t len, tlen;
283 	int offset;
284 
285 	tlen = data_buf->data_len;
286 
287 	for (i = 0; 0 < tlen; i++, tlen -= len)  {
288 		sg = &data_buf->sgl[i];
289 		offset = ((uintptr_t)buf) & ~PAGE_MASK;
290 		len = min(PAGE_SIZE - offset, tlen);
291 		sg_set_buf(sg, buf, len);
292 		buf = (void *)((uintptr_t)buf + len);
293 	}
294 
295 	data_buf->size = i;
296 	sg_mark_end(sg);
297 }
298 
299 
300 static void
iser_bio_to_sg(struct bio * bp,struct iser_data_buf * data_buf)301 iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
302 {
303 	struct scatterlist *sg;
304 	int i;
305 	size_t len, tlen;
306 	int offset;
307 
308 	tlen = bp->bio_bcount;
309 	offset = bp->bio_ma_offset;
310 
311 	for (i = 0; 0 < tlen; i++, tlen -= len) {
312 		sg = &data_buf->sgl[i];
313 		len = min(PAGE_SIZE - offset, tlen);
314 		sg_set_page(sg, bp->bio_ma[i], len, offset);
315 		offset = 0;
316 	}
317 
318 	data_buf->size = i;
319 	sg_mark_end(sg);
320 }
321 
322 static int
iser_csio_to_sg(struct ccb_scsiio * csio,struct iser_data_buf * data_buf)323 iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
324 {
325 	struct ccb_hdr *ccbh;
326 	int err = 0;
327 
328 	ccbh = &csio->ccb_h;
329 	switch ((ccbh->flags & CAM_DATA_MASK)) {
330 		case CAM_DATA_BIO:
331 			iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
332 			break;
333 		case CAM_DATA_VADDR:
334 			/*
335 			 * Support KVA buffers for various scsi commands such as:
336 			 *  - REPORT_LUNS
337 			 *  - MODE_SENSE_6
338 			 *  - INQUIRY
339 			 *  - SERVICE_ACTION_IN.
340 			 * The data of these commands always mapped into KVA.
341 			 */
342 			iser_buf_to_sg(csio->data_ptr, data_buf);
343 			break;
344 		default:
345 			ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
346 			err = EINVAL;
347 	}
348 	return (err);
349 }
350 
351 static inline bool
iser_signal_comp(u8 sig_count)352 iser_signal_comp(u8 sig_count)
353 {
354 	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
355 }
356 
357 int
iser_send_command(struct iser_conn * iser_conn,struct icl_iser_pdu * iser_pdu)358 iser_send_command(struct iser_conn *iser_conn,
359 		  struct icl_iser_pdu *iser_pdu)
360 {
361 	struct iser_data_buf *data_buf;
362 	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
363 	struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
364 	struct ccb_scsiio *csio = iser_pdu->csio;
365 	int err = 0;
366 	u8 sig_count = ++iser_conn->ib_conn.sig_count;
367 
368 	/* build the tx desc regd header and add it to the tx desc dto */
369 	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
370 	iser_create_send_desc(iser_conn, tx_desc);
371 
372 	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
373 		data_buf = &iser_pdu->data[ISER_DIR_IN];
374 	} else {
375 		data_buf = &iser_pdu->data[ISER_DIR_OUT];
376 	}
377 
378 	data_buf->sg = csio->data_ptr;
379 	data_buf->data_len = csio->dxfer_len;
380 
381 	if (likely(csio->dxfer_len)) {
382 		err = iser_csio_to_sg(csio, data_buf);
383 		if (unlikely(err))
384 			goto send_command_error;
385 	}
386 
387 	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
388 		err = iser_prepare_read_cmd(iser_pdu);
389 		if (err)
390 			goto send_command_error;
391 	} else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
392 		err = iser_prepare_write_cmd(iser_pdu);
393 		if (err)
394 			goto send_command_error;
395 	}
396 
397 	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
398 			     iser_signal_comp(sig_count));
399 	if (!err)
400 		return (0);
401 
402 send_command_error:
403 	ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
404 			hdr->bhssc_initiator_task_tag,
405 			hdr->bhssc_expected_data_transfer_length,
406 			err);
407 	return (err);
408 }
409 
410 int
iser_send_control(struct iser_conn * iser_conn,struct icl_iser_pdu * iser_pdu)411 iser_send_control(struct iser_conn *iser_conn,
412 		  struct icl_iser_pdu *iser_pdu)
413 {
414 	struct iser_tx_desc *mdesc;
415 	struct iser_device *device;
416 	size_t datalen = iser_pdu->icl_pdu.ip_data_len;
417 	int err;
418 
419 	mdesc = &iser_pdu->desc;
420 
421 	/* build the tx desc regd header and add it to the tx desc dto */
422 	mdesc->type = ISCSI_TX_CONTROL;
423 	iser_create_send_desc(iser_conn, mdesc);
424 
425 	device = iser_conn->ib_conn.device;
426 
427 	if (datalen > 0) {
428 		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
429 		ib_dma_sync_single_for_cpu(device->ib_device,
430 				iser_conn->login_req_dma, datalen,
431 				DMA_TO_DEVICE);
432 
433 		ib_dma_sync_single_for_device(device->ib_device,
434 			iser_conn->login_req_dma, datalen,
435 			DMA_TO_DEVICE);
436 
437 		tx_dsg->addr    = iser_conn->login_req_dma;
438 		tx_dsg->length  = datalen;
439 		tx_dsg->lkey    = device->mr->lkey;
440 		mdesc->num_sge = 2;
441 	}
442 
443 	/* For login phase and discovery session we re-use the login buffer */
444 	if (!iser_conn->handoff_done) {
445 		err = iser_post_recvl(iser_conn);
446 		if (err)
447 			goto send_control_error;
448 	}
449 
450 	err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
451 	if (!err)
452 		return (0);
453 
454 send_control_error:
455 	ISER_ERR("conn %p failed err %d", iser_conn, err);
456 
457 	return (err);
458 
459 }
460 
461 /**
462  * iser_rcv_dto_completion - recv DTO completion
463  */
464 void
iser_rcv_completion(struct iser_rx_desc * rx_desc,unsigned long rx_xfer_len,struct ib_conn * ib_conn)465 iser_rcv_completion(struct iser_rx_desc *rx_desc,
466 		    unsigned long rx_xfer_len,
467 		    struct ib_conn *ib_conn)
468 {
469 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
470 						   ib_conn);
471 	struct icl_conn *ic = &iser_conn->icl_conn;
472 	struct icl_pdu *response;
473 	struct iscsi_bhs *hdr;
474 	u64 rx_dma;
475 	int rx_buflen;
476 	int outstanding, count, err;
477 
478 	/* differentiate between login to all other PDUs */
479 	if ((char *)rx_desc == iser_conn->login_resp_buf) {
480 		rx_dma = iser_conn->login_resp_dma;
481 		rx_buflen = ISER_RX_LOGIN_SIZE;
482 	} else {
483 		rx_dma = rx_desc->dma_addr;
484 		rx_buflen = ISER_RX_PAYLOAD_SIZE;
485 	}
486 
487 	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
488 				   rx_buflen, DMA_FROM_DEVICE);
489 
490 	hdr = &rx_desc->iscsi_header;
491 
492 	response = iser_new_pdu(ic, M_NOWAIT);
493 	response->ip_bhs = hdr;
494 	response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;
495 
496 	/*
497 	 * In case we got data in the receive buffer, assign the ip_data_mbuf
498 	 * to the rx_buffer - later we'll copy it to upper layer buffers
499 	 */
500 	if (response->ip_data_len)
501 		response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);
502 
503 	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
504 				      rx_buflen, DMA_FROM_DEVICE);
505 
506 	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
507 	 * task eliminates the need to worry on tasks which are completed in   *
508 	 * parallel to the execution of iser_conn_term. So the code that waits *
509 	 * for the posted rx bufs refcount to become zero handles everything   */
510 	ib_conn->post_recv_buf_count--;
511 
512 	if (rx_dma == iser_conn->login_resp_dma)
513 		goto receive;
514 
515 	outstanding = ib_conn->post_recv_buf_count;
516 	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
517 		count = min(iser_conn->qp_max_recv_dtos - outstanding,
518 			    iser_conn->min_posted_rx);
519 		err = iser_post_recvm(iser_conn, count);
520 		if (err)
521 			ISER_ERR("posting %d rx bufs err %d", count, err);
522 	}
523 
524 receive:
525 	(ic->ic_receive)(response);
526 }
527 
528 void
iser_snd_completion(struct iser_tx_desc * tx_desc,struct ib_conn * ib_conn)529 iser_snd_completion(struct iser_tx_desc *tx_desc,
530 		    struct ib_conn *ib_conn)
531 {
532 	struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
533 	struct iser_conn *iser_conn = iser_pdu->iser_conn;
534 
535 	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
536 		iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);
537 }
538