1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 static struct fast_reg_descriptor * 30 iser_reg_desc_get(struct ib_conn *ib_conn) 31 { 32 struct fast_reg_descriptor *desc; 33 34 mtx_lock(&ib_conn->lock); 35 desc = list_first_entry(&ib_conn->fastreg.pool, 36 struct fast_reg_descriptor, list); 37 list_del(&desc->list); 38 mtx_unlock(&ib_conn->lock); 39 40 return (desc); 41 } 42 43 static void 44 iser_reg_desc_put(struct ib_conn *ib_conn, 45 struct fast_reg_descriptor *desc) 46 { 47 mtx_lock(&ib_conn->lock); 48 list_add(&desc->list, &ib_conn->fastreg.pool); 49 mtx_unlock(&ib_conn->lock); 50 } 51 52 #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) 53 54 /** 55 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses 56 * and returns the length of resulting physical address array (may be less than 57 * the original due to possible compaction). 58 * 59 * we build a "page vec" under the assumption that the SG meets the RDMA 60 * alignment requirements. Other then the first and last SG elements, all 61 * the "internal" elements can be compacted into a list whose elements are 62 * dma addresses of physical pages. The code supports also the weird case 63 * where --few fragments of the same page-- are present in the SG as 64 * consecutive elements. Also, it handles one entry SG. 65 */ 66 static int 67 iser_sg_to_page_vec(struct iser_data_buf *data, 68 struct ib_device *ibdev, u64 *pages, 69 int *offset, int *data_size) 70 { 71 struct scatterlist *sg, *sgl = data->sgl; 72 u64 start_addr, end_addr, page, chunk_start = 0; 73 unsigned long total_sz = 0; 74 unsigned int dma_len; 75 int i, new_chunk, cur_page, last_ent = data->dma_nents - 1; 76 77 /* compute the offset of first element */ 78 *offset = (u64) sgl[0].offset & ~MASK_4K; 79 80 new_chunk = 1; 81 cur_page = 0; 82 for_each_sg(sgl, sg, data->dma_nents, i) { 83 start_addr = ib_sg_dma_address(ibdev, sg); 84 if (new_chunk) 85 chunk_start = start_addr; 86 dma_len = ib_sg_dma_len(ibdev, sg); 87 end_addr = start_addr + dma_len; 88 total_sz += dma_len; 89 90 /* collect page fragments until aligned or end of SG list */ 91 if (!IS_4K_ALIGNED(end_addr) && i < last_ent) { 92 new_chunk = 0; 93 continue; 94 } 95 new_chunk = 1; 96 97 /* address of the first page in the contiguous chunk; 98 masking relevant for the very first SG entry, 99 which might be unaligned */ 100 page = chunk_start & MASK_4K; 101 do { 102 pages[cur_page++] = page; 103 page += SIZE_4K; 104 } while (page < end_addr); 105 } 106 107 *data_size = total_sz; 108 109 return (cur_page); 110 } 111 112 /** 113 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned 114 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns 115 * the number of entries which are aligned correctly. Supports the case where 116 * consecutive SG elements are actually fragments of the same physcial page. 117 */ 118 static int 119 iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev) 120 { 121 struct scatterlist *sg, *sgl, *next_sg = NULL; 122 u64 start_addr, end_addr; 123 int i, ret_len, start_check = 0; 124 125 if (data->dma_nents == 1) 126 return (1); 127 128 sgl = data->sgl; 129 start_addr = ib_sg_dma_address(ibdev, sgl); 130 131 for_each_sg(sgl, sg, data->dma_nents, i) { 132 if (start_check && !IS_4K_ALIGNED(start_addr)) 133 break; 134 135 next_sg = sg_next(sg); 136 if (!next_sg) 137 break; 138 139 end_addr = start_addr + ib_sg_dma_len(ibdev, sg); 140 start_addr = ib_sg_dma_address(ibdev, next_sg); 141 142 if (end_addr == start_addr) { 143 start_check = 0; 144 continue; 145 } else 146 start_check = 1; 147 148 if (!IS_4K_ALIGNED(end_addr)) 149 break; 150 } 151 ret_len = (next_sg) ? i : i+1; 152 153 return (ret_len); 154 } 155 156 void 157 iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu, 158 struct iser_data_buf *data, 159 enum dma_data_direction dir) 160 { 161 struct ib_device *dev; 162 163 dev = iser_pdu->iser_conn->ib_conn.device->ib_device; 164 ib_dma_unmap_sg(dev, data->sgl, data->size, dir); 165 } 166 167 static int 168 iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, 169 struct iser_mem_reg *reg) 170 { 171 struct scatterlist *sg = mem->sgl; 172 173 reg->sge.lkey = device->mr->lkey; 174 reg->rkey = device->mr->rkey; 175 reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); 176 reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); 177 178 return (0); 179 } 180 181 /** 182 * TODO: This should be a verb 183 * iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used 184 * for calculating a new rkey for type 2 memory windows. 185 * @rkey - the rkey to increment. 186 */ 187 static inline u32 188 iser_ib_inc_rkey(u32 rkey) 189 { 190 const u32 mask = 0x000000ff; 191 192 return (((rkey + 1) & mask) | (rkey & ~mask)); 193 } 194 195 static void 196 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 197 { 198 u32 rkey; 199 200 memset(inv_wr, 0, sizeof(*inv_wr)); 201 inv_wr->opcode = IB_WR_LOCAL_INV; 202 inv_wr->wr_id = ISER_FASTREG_LI_WRID; 203 inv_wr->ex.invalidate_rkey = mr->rkey; 204 205 rkey = iser_ib_inc_rkey(mr->rkey); 206 ib_update_fast_reg_key(mr, rkey); 207 } 208 209 static int 210 iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu, 211 struct iser_data_buf *mem, 212 struct iser_reg_resources *rsc, 213 struct iser_mem_reg *reg) 214 { 215 struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn; 216 struct iser_device *device = ib_conn->device; 217 struct ib_send_wr fastreg_wr, inv_wr; 218 struct ib_send_wr *bad_wr, *wr = NULL; 219 int ret, offset, size, plen; 220 221 /* if there a single dma entry, dma mr suffices */ 222 if (mem->dma_nents == 1) 223 return iser_reg_dma(device, mem, reg); 224 225 /* rsc is not null */ 226 plen = iser_sg_to_page_vec(mem, device->ib_device, 227 rsc->frpl->page_list, 228 &offset, &size); 229 if (plen * SIZE_4K < size) { 230 ISER_ERR("fast reg page_list too short to hold this SG"); 231 return (EINVAL); 232 } 233 234 if (!rsc->mr_valid) { 235 iser_inv_rkey(&inv_wr, rsc->mr); 236 wr = &inv_wr; 237 } 238 239 /* Prepare FASTREG WR */ 240 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 241 fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; 242 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 243 fastreg_wr.wr.fast_reg.iova_start = rsc->frpl->page_list[0] + offset; 244 fastreg_wr.wr.fast_reg.page_list = rsc->frpl; 245 fastreg_wr.wr.fast_reg.page_list_len = plen; 246 fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; 247 fastreg_wr.wr.fast_reg.length = size; 248 fastreg_wr.wr.fast_reg.rkey = rsc->mr->rkey; 249 fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | 250 IB_ACCESS_REMOTE_WRITE | 251 IB_ACCESS_REMOTE_READ); 252 253 if (!wr) 254 wr = &fastreg_wr; 255 else 256 wr->next = &fastreg_wr; 257 258 ret = ib_post_send(ib_conn->qp, wr, &bad_wr); 259 if (ret) { 260 ISER_ERR("fast registration failed, ret:%d", ret); 261 return (ret); 262 } 263 rsc->mr_valid = 0; 264 265 reg->sge.lkey = rsc->mr->lkey; 266 reg->rkey = rsc->mr->rkey; 267 reg->sge.addr = rsc->frpl->page_list[0] + offset; 268 reg->sge.length = size; 269 270 return (ret); 271 } 272 273 /** 274 * iser_reg_rdma_mem - Registers memory intended for RDMA, 275 * using Fast Registration WR (if possible) obtaining rkey and va 276 * 277 * returns 0 on success, errno code on failure 278 */ 279 int 280 iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu, 281 enum iser_data_dir cmd_dir) 282 { 283 struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn; 284 struct iser_device *device = ib_conn->device; 285 struct ib_device *ibdev = device->ib_device; 286 struct iser_data_buf *mem = &iser_pdu->data[cmd_dir]; 287 struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir]; 288 struct fast_reg_descriptor *desc = NULL; 289 int err, aligned_len; 290 291 aligned_len = iser_data_buf_aligned_len(mem, ibdev); 292 if (aligned_len != mem->dma_nents) { 293 ISER_ERR("bounce buffer is not supported"); 294 return 1; 295 } 296 297 if (mem->dma_nents != 1) { 298 desc = iser_reg_desc_get(ib_conn); 299 mem_reg->mem_h = desc; 300 } 301 302 err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL, 303 mem_reg); 304 if (err) 305 goto err_reg; 306 307 return (0); 308 309 err_reg: 310 if (desc) 311 iser_reg_desc_put(ib_conn, desc); 312 313 return (err); 314 } 315 316 void 317 iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu, 318 enum iser_data_dir cmd_dir) 319 { 320 struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir]; 321 322 if (!reg->mem_h) 323 return; 324 325 iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn, 326 reg->mem_h); 327 reg->mem_h = NULL; 328 } 329 330 int 331 iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu, 332 struct iser_data_buf *data, 333 enum iser_data_dir iser_dir, 334 enum dma_data_direction dma_dir) 335 { 336 struct ib_device *dev; 337 338 iser_pdu->dir[iser_dir] = 1; 339 dev = iser_pdu->iser_conn->ib_conn.device->ib_device; 340 341 data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir); 342 if (data->dma_nents == 0) { 343 ISER_ERR("dma_map_sg failed"); 344 return (EINVAL); 345 } 346 347 return (0); 348 } 349