xref: /freebsd/sys/dev/iser/iser_memory.c (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "icl_iser.h"
28 
29 static struct fast_reg_descriptor *
30 iser_reg_desc_get(struct ib_conn *ib_conn)
31 {
32 	struct fast_reg_descriptor *desc;
33 
34 	mtx_lock(&ib_conn->lock);
35 	desc = list_first_entry(&ib_conn->fastreg.pool,
36 				struct fast_reg_descriptor, list);
37 	list_del(&desc->list);
38 	mtx_unlock(&ib_conn->lock);
39 
40 	return (desc);
41 }
42 
43 static void
44 iser_reg_desc_put(struct ib_conn *ib_conn,
45 		  struct fast_reg_descriptor *desc)
46 {
47 	mtx_lock(&ib_conn->lock);
48 	list_add(&desc->list, &ib_conn->fastreg.pool);
49 	mtx_unlock(&ib_conn->lock);
50 }
51 
52 #define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
53 
54 /**
55  * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
56  * and returns the length of resulting physical address array (may be less than
57  * the original due to possible compaction).
58  *
59  * we build a "page vec" under the assumption that the SG meets the RDMA
60  * alignment requirements. Other then the first and last SG elements, all
61  * the "internal" elements can be compacted into a list whose elements are
62  * dma addresses of physical pages. The code supports also the weird case
63  * where --few fragments of the same page-- are present in the SG as
64  * consecutive elements. Also, it handles one entry SG.
65  */
66 static int
67 iser_sg_to_page_vec(struct iser_data_buf *data,
68 		    struct ib_device *ibdev, u64 *pages,
69 		    int *offset, int *data_size)
70 {
71 	struct scatterlist *sg, *sgl = data->sgl;
72 	u64 start_addr, end_addr, page, chunk_start = 0;
73 	unsigned long total_sz = 0;
74 	unsigned int dma_len;
75 	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
76 
77 	/* compute the offset of first element */
78 	*offset = (u64) sgl[0].offset & ~MASK_4K;
79 
80 	new_chunk = 1;
81 	cur_page  = 0;
82 	for_each_sg(sgl, sg, data->dma_nents, i) {
83 		start_addr = ib_sg_dma_address(ibdev, sg);
84 		if (new_chunk)
85 			chunk_start = start_addr;
86 		dma_len = ib_sg_dma_len(ibdev, sg);
87 		end_addr = start_addr + dma_len;
88 		total_sz += dma_len;
89 
90 		/* collect page fragments until aligned or end of SG list */
91 		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
92 			new_chunk = 0;
93 			continue;
94 		}
95 		new_chunk = 1;
96 
97 		/* address of the first page in the contiguous chunk;
98 		   masking relevant for the very first SG entry,
99 		   which might be unaligned */
100 		page = chunk_start & MASK_4K;
101 		do {
102 			pages[cur_page++] = page;
103 			page += SIZE_4K;
104 		} while (page < end_addr);
105 	}
106 
107 	*data_size = total_sz;
108 
109 	return (cur_page);
110 }
111 
112 /**
113  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
114  * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
115  * the number of entries which are aligned correctly. Supports the case where
116  * consecutive SG elements are actually fragments of the same physcial page.
117  */
118 static int
119 iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev)
120 {
121 	struct scatterlist *sg, *sgl, *next_sg = NULL;
122 	u64 start_addr, end_addr;
123 	int i, ret_len, start_check = 0;
124 
125 	if (data->dma_nents == 1)
126 		return (1);
127 
128 	sgl = data->sgl;
129 	start_addr  = ib_sg_dma_address(ibdev, sgl);
130 
131 	for_each_sg(sgl, sg, data->dma_nents, i) {
132 		if (start_check && !IS_4K_ALIGNED(start_addr))
133 			break;
134 
135 		next_sg = sg_next(sg);
136 		if (!next_sg)
137 			break;
138 
139 		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
140 		start_addr  = ib_sg_dma_address(ibdev, next_sg);
141 
142 		if (end_addr == start_addr) {
143 			start_check = 0;
144 			continue;
145 		} else
146 			start_check = 1;
147 
148 		if (!IS_4K_ALIGNED(end_addr))
149 			break;
150 	}
151 	ret_len = (next_sg) ? i : i+1;
152 
153 	return (ret_len);
154 }
155 
156 void
157 iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu,
158 			 struct iser_data_buf *data,
159 			 enum dma_data_direction dir)
160 {
161 	struct ib_device *dev;
162 
163 	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
164 	ib_dma_unmap_sg(dev, data->sgl, data->size, dir);
165 }
166 
167 static int
168 iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
169 	     struct iser_mem_reg *reg)
170 {
171 	struct scatterlist *sg = mem->sgl;
172 
173 	reg->sge.lkey = device->mr->lkey;
174 	reg->rkey = device->mr->rkey;
175 	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
176 	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
177 
178 	return (0);
179 }
180 
181 /**
182  * TODO: This should be a verb
183  * iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used
184  * for calculating a new rkey for type 2 memory windows.
185  * @rkey - the rkey to increment.
186  */
187 static inline u32
188 iser_ib_inc_rkey(u32 rkey)
189 {
190 	const u32 mask = 0x000000ff;
191 
192 	return (((rkey + 1) & mask) | (rkey & ~mask));
193 }
194 
195 static void
196 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
197 {
198 	u32 rkey;
199 
200 	memset(inv_wr, 0, sizeof(*inv_wr));
201 	inv_wr->opcode = IB_WR_LOCAL_INV;
202 	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
203 	inv_wr->ex.invalidate_rkey = mr->rkey;
204 
205 	rkey = iser_ib_inc_rkey(mr->rkey);
206 	ib_update_fast_reg_key(mr, rkey);
207 }
208 
209 static int
210 iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu,
211 		 struct iser_data_buf *mem,
212 		 struct iser_reg_resources *rsc,
213 		 struct iser_mem_reg *reg)
214 {
215 	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
216 	struct iser_device *device = ib_conn->device;
217 	struct ib_send_wr fastreg_wr, inv_wr;
218 	struct ib_send_wr *bad_wr, *wr = NULL;
219 	int ret, offset, size, plen;
220 
221 	/* if there a single dma entry, dma mr suffices */
222 	if (mem->dma_nents == 1)
223 		return iser_reg_dma(device, mem, reg);
224 
225 	/* rsc is not null */
226 	plen = iser_sg_to_page_vec(mem, device->ib_device,
227 				   rsc->frpl->page_list,
228 				   &offset, &size);
229 	if (plen * SIZE_4K < size) {
230 		ISER_ERR("fast reg page_list too short to hold this SG");
231 		return (EINVAL);
232 	}
233 
234 	if (!rsc->mr_valid) {
235 		iser_inv_rkey(&inv_wr, rsc->mr);
236 		wr = &inv_wr;
237 	}
238 
239 	/* Prepare FASTREG WR */
240 	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
241 	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
242 	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
243 	fastreg_wr.wr.fast_reg.iova_start = rsc->frpl->page_list[0] + offset;
244 	fastreg_wr.wr.fast_reg.page_list = rsc->frpl;
245 	fastreg_wr.wr.fast_reg.page_list_len = plen;
246 	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
247 	fastreg_wr.wr.fast_reg.length = size;
248 	fastreg_wr.wr.fast_reg.rkey = rsc->mr->rkey;
249 	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
250 					       IB_ACCESS_REMOTE_WRITE |
251 					       IB_ACCESS_REMOTE_READ);
252 
253 	if (!wr)
254 		wr = &fastreg_wr;
255 	else
256 		wr->next = &fastreg_wr;
257 
258 	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
259 	if (ret) {
260 		ISER_ERR("fast registration failed, ret:%d", ret);
261 		return (ret);
262 	}
263 	rsc->mr_valid = 0;
264 
265 	reg->sge.lkey = rsc->mr->lkey;
266 	reg->rkey = rsc->mr->rkey;
267 	reg->sge.addr = rsc->frpl->page_list[0] + offset;
268 	reg->sge.length = size;
269 
270 	return (ret);
271 }
272 
273 /**
274  * iser_reg_rdma_mem - Registers memory intended for RDMA,
275  * using Fast Registration WR (if possible) obtaining rkey and va
276  *
277  * returns 0 on success, errno code on failure
278  */
279 int
280 iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu,
281 		  enum iser_data_dir cmd_dir)
282 {
283 	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
284 	struct iser_device   *device = ib_conn->device;
285 	struct ib_device     *ibdev = device->ib_device;
286 	struct iser_data_buf *mem = &iser_pdu->data[cmd_dir];
287 	struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir];
288 	struct fast_reg_descriptor *desc = NULL;
289 	int err, aligned_len;
290 
291 	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
292 	if (aligned_len != mem->dma_nents) {
293 		ISER_ERR("bounce buffer is not supported");
294 		return 1;
295 	}
296 
297 	if (mem->dma_nents != 1) {
298 		desc = iser_reg_desc_get(ib_conn);
299 		mem_reg->mem_h = desc;
300 	}
301 
302 	err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL,
303 				       mem_reg);
304 	if (err)
305 		goto err_reg;
306 
307 	return (0);
308 
309 err_reg:
310 	if (desc)
311 		iser_reg_desc_put(ib_conn, desc);
312 
313 	return (err);
314 }
315 
316 void
317 iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu,
318 		    enum iser_data_dir cmd_dir)
319 {
320 	struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir];
321 
322 	if (!reg->mem_h)
323 		return;
324 
325 	iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn,
326 			  reg->mem_h);
327 	reg->mem_h = NULL;
328 }
329 
330 int
331 iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu,
332 		       struct iser_data_buf *data,
333 		       enum iser_data_dir iser_dir,
334 		       enum dma_data_direction dma_dir)
335 {
336 	struct ib_device *dev;
337 
338 	iser_pdu->dir[iser_dir] = 1;
339 	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
340 
341 	data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir);
342 	if (data->dma_nents == 0) {
343 		ISER_ERR("dma_map_sg failed");
344 		return (EINVAL);
345 	}
346 
347 	return (0);
348 }
349