1 /*- 2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "icl_iser.h" 27 28 static struct fast_reg_descriptor * 29 iser_reg_desc_get(struct ib_conn *ib_conn) 30 { 31 struct fast_reg_descriptor *desc; 32 33 mtx_lock(&ib_conn->lock); 34 desc = list_first_entry(&ib_conn->fastreg.pool, 35 struct fast_reg_descriptor, list); 36 list_del(&desc->list); 37 mtx_unlock(&ib_conn->lock); 38 39 return (desc); 40 } 41 42 static void 43 iser_reg_desc_put(struct ib_conn *ib_conn, 44 struct fast_reg_descriptor *desc) 45 { 46 mtx_lock(&ib_conn->lock); 47 list_add(&desc->list, &ib_conn->fastreg.pool); 48 mtx_unlock(&ib_conn->lock); 49 } 50 51 #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) 52 53 /** 54 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned 55 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns 56 * the number of entries which are aligned correctly. Supports the case where 57 * consecutive SG elements are actually fragments of the same physcial page. 58 */ 59 static int 60 iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev) 61 { 62 struct scatterlist *sg, *sgl, *next_sg = NULL; 63 u64 start_addr, end_addr; 64 int i, ret_len, start_check = 0; 65 66 if (data->dma_nents == 1) 67 return (1); 68 69 sgl = data->sgl; 70 start_addr = ib_sg_dma_address(ibdev, sgl); 71 72 for_each_sg(sgl, sg, data->dma_nents, i) { 73 if (start_check && !IS_4K_ALIGNED(start_addr)) 74 break; 75 76 next_sg = sg_next(sg); 77 if (!next_sg) 78 break; 79 80 end_addr = start_addr + ib_sg_dma_len(ibdev, sg); 81 start_addr = ib_sg_dma_address(ibdev, next_sg); 82 83 if (end_addr == start_addr) { 84 start_check = 0; 85 continue; 86 } else 87 start_check = 1; 88 89 if (!IS_4K_ALIGNED(end_addr)) 90 break; 91 } 92 ret_len = (next_sg) ? i : i+1; 93 94 return (ret_len); 95 } 96 97 void 98 iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu, 99 struct iser_data_buf *data, 100 enum dma_data_direction dir) 101 { 102 struct ib_device *dev; 103 104 dev = iser_pdu->iser_conn->ib_conn.device->ib_device; 105 ib_dma_unmap_sg(dev, data->sgl, data->size, dir); 106 } 107 108 static int 109 iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, 110 struct iser_mem_reg *reg) 111 { 112 struct scatterlist *sg = mem->sgl; 113 114 reg->sge.lkey = device->mr->lkey; 115 reg->rkey = device->mr->rkey; 116 reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); 117 reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); 118 119 return (0); 120 } 121 122 /** 123 * TODO: This should be a verb 124 * iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used 125 * for calculating a new rkey for type 2 memory windows. 126 * @rkey - the rkey to increment. 127 */ 128 static inline u32 129 iser_ib_inc_rkey(u32 rkey) 130 { 131 const u32 mask = 0x000000ff; 132 133 return (((rkey + 1) & mask) | (rkey & ~mask)); 134 } 135 136 static void 137 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 138 { 139 u32 rkey; 140 141 memset(inv_wr, 0, sizeof(*inv_wr)); 142 inv_wr->opcode = IB_WR_LOCAL_INV; 143 inv_wr->wr_id = ISER_FASTREG_LI_WRID; 144 inv_wr->ex.invalidate_rkey = mr->rkey; 145 146 rkey = iser_ib_inc_rkey(mr->rkey); 147 ib_update_fast_reg_key(mr, rkey); 148 } 149 150 static int 151 iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu, 152 struct iser_data_buf *mem, 153 struct iser_reg_resources *rsc, 154 struct iser_mem_reg *reg) 155 { 156 struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn; 157 struct iser_device *device = ib_conn->device; 158 struct ib_mr *mr = rsc->mr; 159 struct ib_reg_wr fastreg_wr; 160 struct ib_send_wr inv_wr; 161 const struct ib_send_wr *bad_wr; 162 struct ib_send_wr *wr = NULL; 163 int ret, n; 164 165 /* if there a single dma entry, dma mr suffices */ 166 if (mem->dma_nents == 1) 167 return iser_reg_dma(device, mem, reg); 168 169 if (!rsc->mr_valid) { 170 iser_inv_rkey(&inv_wr, mr); 171 wr = &inv_wr; 172 } 173 174 n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); 175 if (unlikely(n != mem->size)) { 176 ISER_ERR("failed to map sg (%d/%d)\n", n, mem->size); 177 return n < 0 ? n : -EINVAL; 178 } 179 /* Prepare FASTREG WR */ 180 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 181 fastreg_wr.wr.opcode = IB_WR_REG_MR; 182 fastreg_wr.wr.wr_id = ISER_FASTREG_LI_WRID; 183 fastreg_wr.wr.num_sge = 0; 184 fastreg_wr.mr = mr; 185 fastreg_wr.key = mr->rkey; 186 fastreg_wr.access = IB_ACCESS_LOCAL_WRITE | 187 IB_ACCESS_REMOTE_WRITE | 188 IB_ACCESS_REMOTE_READ; 189 190 if (!wr) 191 wr = &fastreg_wr.wr; 192 else 193 wr->next = &fastreg_wr.wr; 194 195 ret = ib_post_send(ib_conn->qp, wr, &bad_wr); 196 if (ret) { 197 ISER_ERR("fast registration failed, ret:%d", ret); 198 return (ret); 199 } 200 rsc->mr_valid = 0; 201 202 reg->sge.lkey = mr->lkey; 203 reg->rkey = mr->rkey; 204 reg->sge.addr = mr->iova; 205 reg->sge.length = mr->length; 206 207 return (ret); 208 } 209 210 /** 211 * iser_reg_rdma_mem - Registers memory intended for RDMA, 212 * using Fast Registration WR (if possible) obtaining rkey and va 213 * 214 * returns 0 on success, errno code on failure 215 */ 216 int 217 iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu, 218 enum iser_data_dir cmd_dir) 219 { 220 struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn; 221 struct iser_device *device = ib_conn->device; 222 struct ib_device *ibdev = device->ib_device; 223 struct iser_data_buf *mem = &iser_pdu->data[cmd_dir]; 224 struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir]; 225 struct fast_reg_descriptor *desc = NULL; 226 int err, aligned_len; 227 228 aligned_len = iser_data_buf_aligned_len(mem, ibdev); 229 if (aligned_len != mem->dma_nents) { 230 ISER_ERR("bounce buffer is not supported"); 231 return 1; 232 } 233 234 if (mem->dma_nents != 1) { 235 desc = iser_reg_desc_get(ib_conn); 236 mem_reg->mem_h = desc; 237 } 238 239 err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL, 240 mem_reg); 241 if (err) 242 goto err_reg; 243 244 return (0); 245 246 err_reg: 247 if (desc) 248 iser_reg_desc_put(ib_conn, desc); 249 250 return (err); 251 } 252 253 void 254 iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu, 255 enum iser_data_dir cmd_dir) 256 { 257 struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir]; 258 259 if (!reg->mem_h) 260 return; 261 262 iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn, 263 reg->mem_h); 264 reg->mem_h = NULL; 265 } 266 267 int 268 iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu, 269 struct iser_data_buf *data, 270 enum iser_data_dir iser_dir, 271 enum dma_data_direction dma_dir) 272 { 273 struct ib_device *dev; 274 275 iser_pdu->dir[iser_dir] = 1; 276 dev = iser_pdu->iser_conn->ib_conn.device->ib_device; 277 278 data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir); 279 if (data->dma_nents == 0) { 280 ISER_ERR("dma_map_sg failed"); 281 return (EINVAL); 282 } 283 284 return (0); 285 } 286