1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/slab.h> 35 #include <rdma/ib_user_verbs.h> 36 #include <rdma/iter.h> 37 38 #include "mlx4_ib.h" 39 40 static u32 convert_access(int acc) 41 { 42 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | 43 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | 44 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | 45 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | 46 (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | 47 MLX4_PERM_LOCAL_READ; 48 } 49 50 static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) 51 { 52 switch (type) { 53 case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; 54 case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; 55 default: return -1; 56 } 57 } 58 59 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) 60 { 61 struct mlx4_ib_mr *mr; 62 int err; 63 64 mr = kzalloc_obj(*mr); 65 if (!mr) 66 return ERR_PTR(-ENOMEM); 67 68 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, 69 ~0ull, convert_access(acc), 0, 0, &mr->mmr); 70 if (err) 71 goto err_free; 72 73 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); 74 if (err) 75 goto err_mr; 76 77 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 78 mr->umem = NULL; 79 80 return &mr->ibmr; 81 82 err_mr: 83 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 84 85 err_free: 86 kfree(mr); 87 88 return ERR_PTR(err); 89 } 90 91 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, 92 struct ib_umem *umem) 93 { 94 struct ib_block_iter biter; 95 int err, i = 0; 96 u64 addr; 97 98 rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) { 99 addr = rdma_block_iter_dma_address(&biter); 100 err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr); 101 if (err) 102 return err; 103 } 104 return 0; 105 } 106 107 static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, 108 u64 length, int access_flags) 109 { 110 /* 111 * Force registering the memory as writable if the underlying pages 112 * are writable. This is so rereg can change the access permissions 113 * from readable to writable without having to run through ib_umem_get 114 * again 115 */ 116 if (!ib_access_writable(access_flags)) { 117 unsigned long untagged_start = untagged_addr(start); 118 struct vm_area_struct *vma; 119 120 mmap_read_lock(current->mm); 121 /* 122 * FIXME: Ideally this would iterate over all the vmas that 123 * cover the memory, but for now it requires a single vma to 124 * entirely cover the MR to support RO mappings. 125 */ 126 vma = find_vma(current->mm, untagged_start); 127 if (vma && vma->vm_end >= untagged_start + length && 128 vma->vm_start <= untagged_start) { 129 if (vma->vm_flags & VM_WRITE) 130 access_flags |= IB_ACCESS_LOCAL_WRITE; 131 } else { 132 access_flags |= IB_ACCESS_LOCAL_WRITE; 133 } 134 135 mmap_read_unlock(current->mm); 136 } 137 138 return ib_umem_get(device, start, length, access_flags); 139 } 140 141 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 142 u64 virt_addr, int access_flags, 143 struct ib_dmah *dmah, 144 struct ib_udata *udata) 145 { 146 struct mlx4_ib_dev *dev = to_mdev(pd->device); 147 struct mlx4_ib_mr *mr; 148 int shift; 149 int err; 150 int n; 151 152 if (dmah) 153 return ERR_PTR(-EOPNOTSUPP); 154 155 mr = kzalloc_obj(*mr); 156 if (!mr) 157 return ERR_PTR(-ENOMEM); 158 159 mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags); 160 if (IS_ERR(mr->umem)) { 161 err = PTR_ERR(mr->umem); 162 goto err_free; 163 } 164 165 shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); 166 if (shift < 0) { 167 err = shift; 168 goto err_umem; 169 } 170 171 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, 172 convert_access(access_flags), n, shift, &mr->mmr); 173 if (err) 174 goto err_umem; 175 176 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); 177 if (err) 178 goto err_mr; 179 180 err = mlx4_mr_enable(dev->dev, &mr->mmr); 181 if (err) 182 goto err_mr; 183 184 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 185 mr->ibmr.page_size = 1U << shift; 186 187 return &mr->ibmr; 188 189 err_mr: 190 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 191 192 err_umem: 193 ib_umem_release(mr->umem); 194 195 err_free: 196 kfree(mr); 197 198 return ERR_PTR(err); 199 } 200 201 struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 202 u64 length, u64 virt_addr, 203 int mr_access_flags, struct ib_pd *pd, 204 struct ib_udata *udata) 205 { 206 struct mlx4_ib_dev *dev = to_mdev(mr->device); 207 struct mlx4_ib_mr *mmr = to_mmr(mr); 208 struct mlx4_mpt_entry *mpt_entry; 209 struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; 210 int err; 211 212 /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, 213 * we assume that the calls can't run concurrently. Otherwise, a 214 * race exists. 215 */ 216 err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); 217 if (err) 218 return ERR_PTR(err); 219 220 if (flags & IB_MR_REREG_PD) { 221 err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, 222 to_mpd(pd)->pdn); 223 224 if (err) 225 goto release_mpt_entry; 226 } 227 228 if (flags & IB_MR_REREG_ACCESS) { 229 if (ib_access_writable(mr_access_flags) && 230 !mmr->umem->writable) { 231 err = -EPERM; 232 goto release_mpt_entry; 233 } 234 235 err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, 236 convert_access(mr_access_flags)); 237 238 if (err) 239 goto release_mpt_entry; 240 } 241 242 if (flags & IB_MR_REREG_TRANS) { 243 int shift; 244 int n; 245 246 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 247 ib_umem_release(mmr->umem); 248 mmr->umem = mlx4_get_umem_mr(mr->device, start, length, 249 mr_access_flags); 250 if (IS_ERR(mmr->umem)) { 251 err = PTR_ERR(mmr->umem); 252 /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ 253 mmr->umem = NULL; 254 goto release_mpt_entry; 255 } 256 n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); 257 shift = PAGE_SHIFT; 258 259 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, 260 virt_addr, length, n, shift, 261 *pmpt_entry); 262 if (err) { 263 ib_umem_release(mmr->umem); 264 goto release_mpt_entry; 265 } 266 mmr->mmr.iova = virt_addr; 267 mmr->mmr.size = length; 268 269 err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); 270 if (err) { 271 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 272 ib_umem_release(mmr->umem); 273 goto release_mpt_entry; 274 } 275 } 276 277 /* If we couldn't transfer the MR to the HCA, just remember to 278 * return a failure. But dereg_mr will free the resources. 279 */ 280 err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); 281 if (!err && flags & IB_MR_REREG_ACCESS) 282 mmr->mmr.access = mr_access_flags; 283 284 release_mpt_entry: 285 mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); 286 if (err) 287 return ERR_PTR(err); 288 return NULL; 289 } 290 291 static int 292 mlx4_alloc_priv_pages(struct ib_device *device, 293 struct mlx4_ib_mr *mr, 294 int max_pages) 295 { 296 int ret; 297 298 /* Ensure that size is aligned to DMA cacheline 299 * requirements. 300 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES 301 * so page_map_size will never cross PAGE_SIZE. 302 */ 303 mr->page_map_size = roundup(max_pages * sizeof(u64), 304 MLX4_MR_PAGES_ALIGN); 305 306 /* Prevent cross page boundary allocation. */ 307 mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); 308 if (!mr->pages) 309 return -ENOMEM; 310 311 mr->page_map = dma_map_single(device->dev.parent, mr->pages, 312 mr->page_map_size, DMA_TO_DEVICE); 313 314 if (dma_mapping_error(device->dev.parent, mr->page_map)) { 315 ret = -ENOMEM; 316 goto err; 317 } 318 319 return 0; 320 321 err: 322 free_page((unsigned long)mr->pages); 323 return ret; 324 } 325 326 static void 327 mlx4_free_priv_pages(struct mlx4_ib_mr *mr) 328 { 329 if (mr->pages) { 330 struct ib_device *device = mr->ibmr.device; 331 332 dma_unmap_single(device->dev.parent, mr->page_map, 333 mr->page_map_size, DMA_TO_DEVICE); 334 free_page((unsigned long)mr->pages); 335 mr->pages = NULL; 336 } 337 } 338 339 int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 340 { 341 struct mlx4_ib_mr *mr = to_mmr(ibmr); 342 int ret; 343 344 mlx4_free_priv_pages(mr); 345 346 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 347 if (ret) 348 return ret; 349 if (mr->umem) 350 ib_umem_release(mr->umem); 351 kfree(mr); 352 353 return 0; 354 } 355 356 int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 357 { 358 struct mlx4_ib_dev *dev = to_mdev(ibmw->device); 359 struct mlx4_ib_mw *mw = to_mmw(ibmw); 360 int err; 361 362 err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, 363 to_mlx4_type(ibmw->type), &mw->mmw); 364 if (err) 365 return err; 366 367 err = mlx4_mw_enable(dev->dev, &mw->mmw); 368 if (err) 369 goto err_mw; 370 371 ibmw->rkey = mw->mmw.key; 372 return 0; 373 374 err_mw: 375 mlx4_mw_free(dev->dev, &mw->mmw); 376 return err; 377 } 378 379 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) 380 { 381 struct mlx4_ib_mw *mw = to_mmw(ibmw); 382 383 mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); 384 return 0; 385 } 386 387 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 388 u32 max_num_sg) 389 { 390 struct mlx4_ib_dev *dev = to_mdev(pd->device); 391 struct mlx4_ib_mr *mr; 392 int err; 393 394 if (mr_type != IB_MR_TYPE_MEM_REG || 395 max_num_sg > MLX4_MAX_FAST_REG_PAGES) 396 return ERR_PTR(-EINVAL); 397 398 mr = kzalloc_obj(*mr); 399 if (!mr) 400 return ERR_PTR(-ENOMEM); 401 402 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 403 max_num_sg, 0, &mr->mmr); 404 if (err) 405 goto err_free; 406 407 err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); 408 if (err) 409 goto err_free_mr; 410 411 mr->max_pages = max_num_sg; 412 err = mlx4_mr_enable(dev->dev, &mr->mmr); 413 if (err) 414 goto err_free_pl; 415 416 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 417 mr->umem = NULL; 418 419 return &mr->ibmr; 420 421 err_free_pl: 422 mr->ibmr.device = pd->device; 423 mlx4_free_priv_pages(mr); 424 err_free_mr: 425 (void) mlx4_mr_free(dev->dev, &mr->mmr); 426 err_free: 427 kfree(mr); 428 return ERR_PTR(err); 429 } 430 431 static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) 432 { 433 struct mlx4_ib_mr *mr = to_mmr(ibmr); 434 435 if (unlikely(mr->npages == mr->max_pages)) 436 return -ENOMEM; 437 438 mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); 439 440 return 0; 441 } 442 443 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 444 unsigned int *sg_offset) 445 { 446 struct mlx4_ib_mr *mr = to_mmr(ibmr); 447 int rc; 448 449 mr->npages = 0; 450 451 ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, 452 mr->page_map_size, DMA_TO_DEVICE); 453 454 rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); 455 456 ib_dma_sync_single_for_device(ibmr->device, mr->page_map, 457 mr->page_map_size, DMA_TO_DEVICE); 458 459 return rc; 460 } 461