xref: /linux/drivers/infiniband/ulp/iser/iser_memory.c (revision e8d235d4d8fb8957bae5f6ed4521115203a00d8b)
1 /*
2  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *	- Redistributions of source code must retain the above
16  *	  copyright notice, this list of conditions and the following
17  *	  disclaimer.
18  *
19  *	- Redistributions in binary form must reproduce the above
20  *	  copyright notice, this list of conditions and the following
21  *	  disclaimer in the documentation and/or other materials
22  *	  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 #include <linux/module.h>
34 #include <linux/kernel.h>
35 #include <linux/slab.h>
36 #include <linux/mm.h>
37 #include <linux/highmem.h>
38 #include <linux/scatterlist.h>
39 
40 #include "iscsi_iser.h"
41 
42 static void
43 iser_free_bounce_sg(struct iser_data_buf *data)
44 {
45 	struct scatterlist *sg;
46 	int count;
47 
48 	for_each_sg(data->sg, sg, data->size, count)
49 		__free_page(sg_page(sg));
50 
51 	kfree(data->sg);
52 
53 	data->sg = data->orig_sg;
54 	data->size = data->orig_size;
55 	data->orig_sg = NULL;
56 	data->orig_size = 0;
57 }
58 
59 static int
60 iser_alloc_bounce_sg(struct iser_data_buf *data)
61 {
62 	struct scatterlist *sg;
63 	struct page *page;
64 	unsigned long length = data->data_len;
65 	int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
66 
67 	sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
68 	if (!sg)
69 		goto err;
70 
71 	sg_init_table(sg, nents);
72 	while (length) {
73 		u32 page_len = min_t(u32, length, PAGE_SIZE);
74 
75 		page = alloc_page(GFP_ATOMIC);
76 		if (!page)
77 			goto err;
78 
79 		sg_set_page(&sg[i], page, page_len, 0);
80 		length -= page_len;
81 		i++;
82 	}
83 
84 	data->orig_sg = data->sg;
85 	data->orig_size = data->size;
86 	data->sg = sg;
87 	data->size = nents;
88 
89 	return 0;
90 
91 err:
92 	for (; i > 0; i--)
93 		__free_page(sg_page(&sg[i - 1]));
94 	kfree(sg);
95 
96 	return -ENOMEM;
97 }
98 
99 static void
100 iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
101 {
102 	struct scatterlist *osg, *bsg = data->sg;
103 	void *oaddr, *baddr;
104 	unsigned int left = data->data_len;
105 	unsigned int bsg_off = 0;
106 	int i;
107 
108 	for_each_sg(data->orig_sg, osg, data->orig_size, i) {
109 		unsigned int copy_len, osg_off = 0;
110 
111 		oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
112 		copy_len = min(left, osg->length);
113 		while (copy_len) {
114 			unsigned int len = min(copy_len, bsg->length - bsg_off);
115 
116 			baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
117 			if (to_buffer)
118 				memcpy(baddr + bsg_off, oaddr + osg_off, len);
119 			else
120 				memcpy(oaddr + osg_off, baddr + bsg_off, len);
121 
122 			kunmap_atomic(baddr - bsg->offset);
123 			osg_off += len;
124 			bsg_off += len;
125 			copy_len -= len;
126 
127 			if (bsg_off >= bsg->length) {
128 				bsg = sg_next(bsg);
129 				bsg_off = 0;
130 			}
131 		}
132 		kunmap_atomic(oaddr - osg->offset);
133 		left -= osg_off;
134 	}
135 }
136 
137 static inline void
138 iser_copy_from_bounce(struct iser_data_buf *data)
139 {
140 	iser_copy_bounce(data, false);
141 }
142 
143 static inline void
144 iser_copy_to_bounce(struct iser_data_buf *data)
145 {
146 	iser_copy_bounce(data, true);
147 }
148 
149 struct fast_reg_descriptor *
150 iser_reg_desc_get(struct ib_conn *ib_conn)
151 {
152 	struct fast_reg_descriptor *desc;
153 	unsigned long flags;
154 
155 	spin_lock_irqsave(&ib_conn->lock, flags);
156 	desc = list_first_entry(&ib_conn->fastreg.pool,
157 				struct fast_reg_descriptor, list);
158 	list_del(&desc->list);
159 	spin_unlock_irqrestore(&ib_conn->lock, flags);
160 
161 	return desc;
162 }
163 
164 void
165 iser_reg_desc_put(struct ib_conn *ib_conn,
166 		  struct fast_reg_descriptor *desc)
167 {
168 	unsigned long flags;
169 
170 	spin_lock_irqsave(&ib_conn->lock, flags);
171 	list_add(&desc->list, &ib_conn->fastreg.pool);
172 	spin_unlock_irqrestore(&ib_conn->lock, flags);
173 }
174 
175 /**
176  * iser_start_rdma_unaligned_sg
177  */
178 static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
179 					struct iser_data_buf *data,
180 					enum iser_data_dir cmd_dir)
181 {
182 	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
183 	int rc;
184 
185 	rc = iser_alloc_bounce_sg(data);
186 	if (rc) {
187 		iser_err("Failed to allocate bounce for data len %lu\n",
188 			 data->data_len);
189 		return rc;
190 	}
191 
192 	if (cmd_dir == ISER_DIR_OUT)
193 		iser_copy_to_bounce(data);
194 
195 	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
196 					(cmd_dir == ISER_DIR_OUT) ?
197 					DMA_TO_DEVICE : DMA_FROM_DEVICE);
198 	if (!data->dma_nents) {
199 		iser_err("Got dma_nents %d, something went wrong...\n",
200 			 data->dma_nents);
201 		rc = -ENOMEM;
202 		goto err;
203 	}
204 
205 	return 0;
206 err:
207 	iser_free_bounce_sg(data);
208 	return rc;
209 }
210 
211 /**
212  * iser_finalize_rdma_unaligned_sg
213  */
214 
215 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
216 				     struct iser_data_buf *data,
217 				     enum iser_data_dir cmd_dir)
218 {
219 	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
220 
221 	ib_dma_unmap_sg(dev, data->sg, data->size,
222 			(cmd_dir == ISER_DIR_OUT) ?
223 			DMA_TO_DEVICE : DMA_FROM_DEVICE);
224 
225 	if (cmd_dir == ISER_DIR_IN)
226 		iser_copy_from_bounce(data);
227 
228 	iser_free_bounce_sg(data);
229 }
230 
231 #define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
232 
233 /**
234  * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
235  * and returns the length of resulting physical address array (may be less than
236  * the original due to possible compaction).
237  *
238  * we build a "page vec" under the assumption that the SG meets the RDMA
239  * alignment requirements. Other then the first and last SG elements, all
240  * the "internal" elements can be compacted into a list whose elements are
241  * dma addresses of physical pages. The code supports also the weird case
242  * where --few fragments of the same page-- are present in the SG as
243  * consecutive elements. Also, it handles one entry SG.
244  */
245 
246 static int iser_sg_to_page_vec(struct iser_data_buf *data,
247 			       struct ib_device *ibdev, u64 *pages,
248 			       int *offset, int *data_size)
249 {
250 	struct scatterlist *sg, *sgl = data->sg;
251 	u64 start_addr, end_addr, page, chunk_start = 0;
252 	unsigned long total_sz = 0;
253 	unsigned int dma_len;
254 	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
255 
256 	/* compute the offset of first element */
257 	*offset = (u64) sgl[0].offset & ~MASK_4K;
258 
259 	new_chunk = 1;
260 	cur_page  = 0;
261 	for_each_sg(sgl, sg, data->dma_nents, i) {
262 		start_addr = ib_sg_dma_address(ibdev, sg);
263 		if (new_chunk)
264 			chunk_start = start_addr;
265 		dma_len = ib_sg_dma_len(ibdev, sg);
266 		end_addr = start_addr + dma_len;
267 		total_sz += dma_len;
268 
269 		/* collect page fragments until aligned or end of SG list */
270 		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
271 			new_chunk = 0;
272 			continue;
273 		}
274 		new_chunk = 1;
275 
276 		/* address of the first page in the contiguous chunk;
277 		   masking relevant for the very first SG entry,
278 		   which might be unaligned */
279 		page = chunk_start & MASK_4K;
280 		do {
281 			pages[cur_page++] = page;
282 			page += SIZE_4K;
283 		} while (page < end_addr);
284 	}
285 
286 	*data_size = total_sz;
287 	iser_dbg("page_vec->data_size:%d cur_page %d\n",
288 		 *data_size, cur_page);
289 	return cur_page;
290 }
291 
292 
293 /**
294  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
295  * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
296  * the number of entries which are aligned correctly. Supports the case where
297  * consecutive SG elements are actually fragments of the same physcial page.
298  */
299 static int iser_data_buf_aligned_len(struct iser_data_buf *data,
300 				      struct ib_device *ibdev)
301 {
302 	struct scatterlist *sg, *sgl, *next_sg = NULL;
303 	u64 start_addr, end_addr;
304 	int i, ret_len, start_check = 0;
305 
306 	if (data->dma_nents == 1)
307 		return 1;
308 
309 	sgl = data->sg;
310 	start_addr  = ib_sg_dma_address(ibdev, sgl);
311 
312 	for_each_sg(sgl, sg, data->dma_nents, i) {
313 		if (start_check && !IS_4K_ALIGNED(start_addr))
314 			break;
315 
316 		next_sg = sg_next(sg);
317 		if (!next_sg)
318 			break;
319 
320 		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
321 		start_addr  = ib_sg_dma_address(ibdev, next_sg);
322 
323 		if (end_addr == start_addr) {
324 			start_check = 0;
325 			continue;
326 		} else
327 			start_check = 1;
328 
329 		if (!IS_4K_ALIGNED(end_addr))
330 			break;
331 	}
332 	ret_len = (next_sg) ? i : i+1;
333 	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
334 		 ret_len, data->dma_nents, data);
335 	return ret_len;
336 }
337 
338 static void iser_data_buf_dump(struct iser_data_buf *data,
339 			       struct ib_device *ibdev)
340 {
341 	struct scatterlist *sg;
342 	int i;
343 
344 	for_each_sg(data->sg, sg, data->dma_nents, i)
345 		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
346 			 "off:0x%x sz:0x%x dma_len:0x%x\n",
347 			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
348 			 sg_page(sg), sg->offset,
349 			 sg->length, ib_sg_dma_len(ibdev, sg));
350 }
351 
352 static void iser_dump_page_vec(struct iser_page_vec *page_vec)
353 {
354 	int i;
355 
356 	iser_err("page vec length %d data size %d\n",
357 		 page_vec->length, page_vec->data_size);
358 	for (i = 0; i < page_vec->length; i++)
359 		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
360 }
361 
362 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
363 			    struct iser_data_buf *data,
364 			    enum iser_data_dir iser_dir,
365 			    enum dma_data_direction dma_dir)
366 {
367 	struct ib_device *dev;
368 
369 	iser_task->dir[iser_dir] = 1;
370 	dev = iser_task->iser_conn->ib_conn.device->ib_device;
371 
372 	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
373 	if (data->dma_nents == 0) {
374 		iser_err("dma_map_sg failed!!!\n");
375 		return -EINVAL;
376 	}
377 	return 0;
378 }
379 
380 void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
381 			      struct iser_data_buf *data,
382 			      enum dma_data_direction dir)
383 {
384 	struct ib_device *dev;
385 
386 	dev = iser_task->iser_conn->ib_conn.device->ib_device;
387 	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
388 }
389 
390 static int
391 iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
392 	     struct iser_mem_reg *reg)
393 {
394 	struct scatterlist *sg = mem->sg;
395 
396 	reg->sge.lkey = device->mr->lkey;
397 	reg->rkey = device->mr->rkey;
398 	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
399 	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
400 
401 	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
402 		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
403 		 reg->sge.addr, reg->sge.length);
404 
405 	return 0;
406 }
407 
408 static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
409 			      struct iser_data_buf *mem,
410 			      enum iser_data_dir cmd_dir,
411 			      int aligned_len)
412 {
413 	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
414 	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
415 
416 	iscsi_conn->fmr_unalign_cnt++;
417 	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
418 		  aligned_len, mem->size);
419 
420 	if (iser_debug_level > 0)
421 		iser_data_buf_dump(mem, device->ib_device);
422 
423 	/* unmap the command data before accessing it */
424 	iser_dma_unmap_task_data(iser_task, mem,
425 				 (cmd_dir == ISER_DIR_OUT) ?
426 				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
427 
428 	/* allocate copy buf, if we are writing, copy the */
429 	/* unaligned scatterlist, dma map the copy        */
430 	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
431 		return -ENOMEM;
432 
433 	return 0;
434 }
435 
436 /**
437  * iser_reg_page_vec - Register physical memory
438  *
439  * returns: 0 on success, errno code on failure
440  */
441 static
442 int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
443 		      struct iser_data_buf *mem,
444 		      struct iser_page_vec *page_vec,
445 		      struct iser_mem_reg *mem_reg)
446 {
447 	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
448 	struct iser_device *device = ib_conn->device;
449 	struct ib_pool_fmr *fmr;
450 	int ret, plen;
451 
452 	plen = iser_sg_to_page_vec(mem, device->ib_device,
453 				   page_vec->pages,
454 				   &page_vec->offset,
455 				   &page_vec->data_size);
456 	page_vec->length = plen;
457 	if (plen * SIZE_4K < page_vec->data_size) {
458 		iser_err("page vec too short to hold this SG\n");
459 		iser_data_buf_dump(mem, device->ib_device);
460 		iser_dump_page_vec(page_vec);
461 		return -EINVAL;
462 	}
463 
464 	fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
465 				    page_vec->pages,
466 				    page_vec->length,
467 				    page_vec->pages[0]);
468 	if (IS_ERR(fmr)) {
469 		ret = PTR_ERR(fmr);
470 		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
471 		return ret;
472 	}
473 
474 	mem_reg->sge.lkey = fmr->fmr->lkey;
475 	mem_reg->rkey = fmr->fmr->rkey;
476 	mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
477 	mem_reg->sge.length = page_vec->data_size;
478 	mem_reg->mem_h = fmr;
479 
480 	return 0;
481 }
482 
483 /**
484  * Unregister (previosuly registered using FMR) memory.
485  * If memory is non-FMR does nothing.
486  */
487 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
488 			enum iser_data_dir cmd_dir)
489 {
490 	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
491 	int ret;
492 
493 	if (!reg->mem_h)
494 		return;
495 
496 	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
497 
498 	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
499 	if (ret)
500 		iser_err("ib_fmr_pool_unmap failed %d\n", ret);
501 
502 	reg->mem_h = NULL;
503 }
504 
505 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
506 			    enum iser_data_dir cmd_dir)
507 {
508 	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
509 
510 	if (!reg->mem_h)
511 		return;
512 
513 	iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
514 			  reg->mem_h);
515 	reg->mem_h = NULL;
516 }
517 
518 /**
519  * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
520  * using FMR (if possible) obtaining rkey and va
521  *
522  * returns 0 on success, errno code on failure
523  */
524 int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
525 			  enum iser_data_dir cmd_dir)
526 {
527 	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
528 	struct iser_device   *device = ib_conn->device;
529 	struct ib_device     *ibdev = device->ib_device;
530 	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
531 	struct iser_mem_reg *mem_reg;
532 	int aligned_len;
533 	int err;
534 	int i;
535 
536 	mem_reg = &iser_task->rdma_reg[cmd_dir];
537 
538 	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
539 	if (aligned_len != mem->dma_nents) {
540 		err = fall_to_bounce_buf(iser_task, mem,
541 					 cmd_dir, aligned_len);
542 		if (err) {
543 			iser_err("failed to allocate bounce buffer\n");
544 			return err;
545 		}
546 	}
547 
548 	/* if there a single dma entry, FMR is not needed */
549 	if (mem->dma_nents == 1) {
550 		return iser_reg_dma(device, mem, mem_reg);
551 	} else { /* use FMR for multiple dma entries */
552 		err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
553 					mem_reg);
554 		if (err && err != -EAGAIN) {
555 			iser_data_buf_dump(mem, ibdev);
556 			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
557 				 mem->dma_nents,
558 				 ntoh24(iser_task->desc.iscsi_header.dlength));
559 			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
560 				 ib_conn->fmr.page_vec->data_size,
561 				 ib_conn->fmr.page_vec->length,
562 				 ib_conn->fmr.page_vec->offset);
563 			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
564 				iser_err("page_vec[%d] = 0x%llx\n", i,
565 					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
566 		}
567 		if (err)
568 			return err;
569 	}
570 	return 0;
571 }
572 
573 static void
574 iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
575 		    struct ib_sig_domain *domain)
576 {
577 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
578 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
579 	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
580 	/*
581 	 * At the moment we hard code those, but in the future
582 	 * we will take them from sc.
583 	 */
584 	domain->sig.dif.apptag_check_mask = 0xffff;
585 	domain->sig.dif.app_escape = true;
586 	domain->sig.dif.ref_escape = true;
587 	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
588 		domain->sig.dif.ref_remap = true;
589 };
590 
591 static int
592 iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
593 {
594 	switch (scsi_get_prot_op(sc)) {
595 	case SCSI_PROT_WRITE_INSERT:
596 	case SCSI_PROT_READ_STRIP:
597 		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
598 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
599 		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
600 		break;
601 	case SCSI_PROT_READ_INSERT:
602 	case SCSI_PROT_WRITE_STRIP:
603 		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
604 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
605 		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
606 						IB_T10DIF_CSUM : IB_T10DIF_CRC;
607 		break;
608 	case SCSI_PROT_READ_PASS:
609 	case SCSI_PROT_WRITE_PASS:
610 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
611 		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
612 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
613 		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
614 						IB_T10DIF_CSUM : IB_T10DIF_CRC;
615 		break;
616 	default:
617 		iser_err("Unsupported PI operation %d\n",
618 			 scsi_get_prot_op(sc));
619 		return -EINVAL;
620 	}
621 
622 	return 0;
623 }
624 
625 static inline void
626 iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
627 {
628 	*mask = 0;
629 	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
630 		*mask |= ISER_CHECK_REFTAG;
631 	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
632 		*mask |= ISER_CHECK_GUARD;
633 }
634 
635 static void
636 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
637 {
638 	u32 rkey;
639 
640 	memset(inv_wr, 0, sizeof(*inv_wr));
641 	inv_wr->opcode = IB_WR_LOCAL_INV;
642 	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
643 	inv_wr->ex.invalidate_rkey = mr->rkey;
644 
645 	rkey = ib_inc_rkey(mr->rkey);
646 	ib_update_fast_reg_key(mr, rkey);
647 }
648 
649 static int
650 iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
651 		struct fast_reg_descriptor *desc,
652 		struct iser_mem_reg *data_reg,
653 		struct iser_mem_reg *prot_reg,
654 		struct iser_mem_reg *sig_reg)
655 {
656 	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
657 	struct iser_pi_context *pi_ctx = desc->pi_ctx;
658 	struct ib_send_wr sig_wr, inv_wr;
659 	struct ib_send_wr *bad_wr, *wr = NULL;
660 	struct ib_sig_attrs sig_attrs;
661 	int ret;
662 
663 	memset(&sig_attrs, 0, sizeof(sig_attrs));
664 	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
665 	if (ret)
666 		goto err;
667 
668 	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
669 
670 	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
671 		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
672 		wr = &inv_wr;
673 	}
674 
675 	memset(&sig_wr, 0, sizeof(sig_wr));
676 	sig_wr.opcode = IB_WR_REG_SIG_MR;
677 	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
678 	sig_wr.sg_list = &data_reg->sge;
679 	sig_wr.num_sge = 1;
680 	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
681 	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
682 	if (scsi_prot_sg_count(iser_task->sc))
683 		sig_wr.wr.sig_handover.prot = &prot_reg->sge;
684 	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
685 					      IB_ACCESS_REMOTE_READ |
686 					      IB_ACCESS_REMOTE_WRITE;
687 
688 	if (!wr)
689 		wr = &sig_wr;
690 	else
691 		wr->next = &sig_wr;
692 
693 	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
694 	if (ret) {
695 		iser_err("reg_sig_mr failed, ret:%d\n", ret);
696 		goto err;
697 	}
698 	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
699 
700 	sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
701 	sig_reg->rkey = pi_ctx->sig_mr->rkey;
702 	sig_reg->sge.addr = 0;
703 	sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
704 
705 	iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
706 		 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
707 		 sig_reg->sge.length);
708 err:
709 	return ret;
710 }
711 
712 static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
713 			    struct iser_data_buf *mem,
714 			    struct fast_reg_descriptor *desc,
715 			    enum iser_reg_indicator ind,
716 			    struct iser_mem_reg *reg)
717 {
718 	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
719 	struct iser_device *device = ib_conn->device;
720 	struct ib_mr *mr;
721 	struct ib_fast_reg_page_list *frpl;
722 	struct ib_send_wr fastreg_wr, inv_wr;
723 	struct ib_send_wr *bad_wr, *wr = NULL;
724 	int ret, offset, size, plen;
725 
726 	/* if there a single dma entry, dma mr suffices */
727 	if (mem->dma_nents == 1)
728 		return iser_reg_dma(device, mem, reg);
729 
730 	if (ind == ISER_DATA_KEY_VALID) {
731 		mr = desc->data_mr;
732 		frpl = desc->data_frpl;
733 	} else {
734 		mr = desc->pi_ctx->prot_mr;
735 		frpl = desc->pi_ctx->prot_frpl;
736 	}
737 
738 	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
739 				   &offset, &size);
740 	if (plen * SIZE_4K < size) {
741 		iser_err("fast reg page_list too short to hold this SG\n");
742 		return -EINVAL;
743 	}
744 
745 	if (!(desc->reg_indicators & ind)) {
746 		iser_inv_rkey(&inv_wr, mr);
747 		wr = &inv_wr;
748 	}
749 
750 	/* Prepare FASTREG WR */
751 	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
752 	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
753 	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
754 	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
755 	fastreg_wr.wr.fast_reg.page_list = frpl;
756 	fastreg_wr.wr.fast_reg.page_list_len = plen;
757 	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
758 	fastreg_wr.wr.fast_reg.length = size;
759 	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
760 	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
761 					       IB_ACCESS_REMOTE_WRITE |
762 					       IB_ACCESS_REMOTE_READ);
763 
764 	if (!wr)
765 		wr = &fastreg_wr;
766 	else
767 		wr->next = &fastreg_wr;
768 
769 	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
770 	if (ret) {
771 		iser_err("fast registration failed, ret:%d\n", ret);
772 		return ret;
773 	}
774 	desc->reg_indicators &= ~ind;
775 
776 	reg->sge.lkey = mr->lkey;
777 	reg->rkey = mr->rkey;
778 	reg->sge.addr = frpl->page_list[0] + offset;
779 	reg->sge.length = size;
780 
781 	return ret;
782 }
783 
784 /**
785  * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
786  * using Fast Registration WR (if possible) obtaining rkey and va
787  *
788  * returns 0 on success, errno code on failure
789  */
790 int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
791 			      enum iser_data_dir cmd_dir)
792 {
793 	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
794 	struct iser_device *device = ib_conn->device;
795 	struct ib_device *ibdev = device->ib_device;
796 	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
797 	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
798 	struct fast_reg_descriptor *desc = NULL;
799 	int err, aligned_len;
800 
801 	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
802 	if (aligned_len != mem->dma_nents) {
803 		err = fall_to_bounce_buf(iser_task, mem,
804 					 cmd_dir, aligned_len);
805 		if (err) {
806 			iser_err("failed to allocate bounce buffer\n");
807 			return err;
808 		}
809 	}
810 
811 	if (mem->dma_nents != 1 ||
812 	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
813 		desc = iser_reg_desc_get(ib_conn);
814 		mem_reg->mem_h = desc;
815 	}
816 
817 	err = iser_fast_reg_mr(iser_task, mem, desc,
818 			       ISER_DATA_KEY_VALID, mem_reg);
819 	if (err)
820 		goto err_reg;
821 
822 	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
823 		struct iser_mem_reg prot_reg;
824 
825 		memset(&prot_reg, 0, sizeof(prot_reg));
826 		if (scsi_prot_sg_count(iser_task->sc)) {
827 			mem = &iser_task->prot[cmd_dir];
828 			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
829 			if (aligned_len != mem->dma_nents) {
830 				err = fall_to_bounce_buf(iser_task, mem,
831 							 cmd_dir, aligned_len);
832 				if (err) {
833 					iser_err("failed to allocate bounce buffer\n");
834 					return err;
835 				}
836 			}
837 
838 			err = iser_fast_reg_mr(iser_task, mem, desc,
839 					       ISER_PROT_KEY_VALID, &prot_reg);
840 			if (err)
841 				goto err_reg;
842 		}
843 
844 		err = iser_reg_sig_mr(iser_task, desc, mem_reg,
845 				      &prot_reg, mem_reg);
846 		if (err) {
847 			iser_err("Failed to register signature mr\n");
848 			return err;
849 		}
850 		desc->reg_indicators |= ISER_FASTREG_PROTECTED;
851 	}
852 
853 	return 0;
854 err_reg:
855 	if (desc)
856 		iser_reg_desc_put(ib_conn, desc);
857 
858 	return err;
859 }
860