1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/slab.h> 35 #include <rdma/ib_user_verbs.h> 36 37 #include "mlx4_ib.h" 38 39 static u32 convert_access(int acc) 40 { 41 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | 42 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | 43 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | 44 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | 45 (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | 46 MLX4_PERM_LOCAL_READ; 47 } 48 49 static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) 50 { 51 switch (type) { 52 case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; 53 case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; 54 default: return -1; 55 } 56 } 57 58 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) 59 { 60 struct mlx4_ib_mr *mr; 61 int err; 62 63 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 64 if (!mr) 65 return ERR_PTR(-ENOMEM); 66 67 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, 68 ~0ull, convert_access(acc), 0, 0, &mr->mmr); 69 if (err) 70 goto err_free; 71 72 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); 73 if (err) 74 goto err_mr; 75 76 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 77 mr->umem = NULL; 78 79 return &mr->ibmr; 80 81 err_mr: 82 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 83 84 err_free: 85 kfree(mr); 86 87 return ERR_PTR(err); 88 } 89 90 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, 91 struct ib_umem *umem) 92 { 93 struct ib_block_iter biter; 94 int err, i = 0; 95 u64 addr; 96 97 rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) { 98 addr = rdma_block_iter_dma_address(&biter); 99 err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr); 100 if (err) 101 return err; 102 } 103 return 0; 104 } 105 106 static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, 107 u64 length, int access_flags) 108 { 109 /* 110 * Force registering the memory as writable if the underlying pages 111 * are writable. This is so rereg can change the access permissions 112 * from readable to writable without having to run through ib_umem_get 113 * again 114 */ 115 if (!ib_access_writable(access_flags)) { 116 unsigned long untagged_start = untagged_addr(start); 117 struct vm_area_struct *vma; 118 119 mmap_read_lock(current->mm); 120 /* 121 * FIXME: Ideally this would iterate over all the vmas that 122 * cover the memory, but for now it requires a single vma to 123 * entirely cover the MR to support RO mappings. 124 */ 125 vma = find_vma(current->mm, untagged_start); 126 if (vma && vma->vm_end >= untagged_start + length && 127 vma->vm_start <= untagged_start) { 128 if (vma->vm_flags & VM_WRITE) 129 access_flags |= IB_ACCESS_LOCAL_WRITE; 130 } else { 131 access_flags |= IB_ACCESS_LOCAL_WRITE; 132 } 133 134 mmap_read_unlock(current->mm); 135 } 136 137 return ib_umem_get(device, start, length, access_flags); 138 } 139 140 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 141 u64 virt_addr, int access_flags, 142 struct ib_dmah *dmah, 143 struct ib_udata *udata) 144 { 145 struct mlx4_ib_dev *dev = to_mdev(pd->device); 146 struct mlx4_ib_mr *mr; 147 int shift; 148 int err; 149 int n; 150 151 if (dmah) 152 return ERR_PTR(-EOPNOTSUPP); 153 154 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 155 if (!mr) 156 return ERR_PTR(-ENOMEM); 157 158 mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags); 159 if (IS_ERR(mr->umem)) { 160 err = PTR_ERR(mr->umem); 161 goto err_free; 162 } 163 164 shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); 165 if (shift < 0) { 166 err = shift; 167 goto err_umem; 168 } 169 170 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, 171 convert_access(access_flags), n, shift, &mr->mmr); 172 if (err) 173 goto err_umem; 174 175 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); 176 if (err) 177 goto err_mr; 178 179 err = mlx4_mr_enable(dev->dev, &mr->mmr); 180 if (err) 181 goto err_mr; 182 183 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 184 mr->ibmr.page_size = 1U << shift; 185 186 return &mr->ibmr; 187 188 err_mr: 189 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 190 191 err_umem: 192 ib_umem_release(mr->umem); 193 194 err_free: 195 kfree(mr); 196 197 return ERR_PTR(err); 198 } 199 200 struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 201 u64 length, u64 virt_addr, 202 int mr_access_flags, struct ib_pd *pd, 203 struct ib_udata *udata) 204 { 205 struct mlx4_ib_dev *dev = to_mdev(mr->device); 206 struct mlx4_ib_mr *mmr = to_mmr(mr); 207 struct mlx4_mpt_entry *mpt_entry; 208 struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; 209 int err; 210 211 /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, 212 * we assume that the calls can't run concurrently. Otherwise, a 213 * race exists. 214 */ 215 err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); 216 if (err) 217 return ERR_PTR(err); 218 219 if (flags & IB_MR_REREG_PD) { 220 err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, 221 to_mpd(pd)->pdn); 222 223 if (err) 224 goto release_mpt_entry; 225 } 226 227 if (flags & IB_MR_REREG_ACCESS) { 228 if (ib_access_writable(mr_access_flags) && 229 !mmr->umem->writable) { 230 err = -EPERM; 231 goto release_mpt_entry; 232 } 233 234 err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, 235 convert_access(mr_access_flags)); 236 237 if (err) 238 goto release_mpt_entry; 239 } 240 241 if (flags & IB_MR_REREG_TRANS) { 242 int shift; 243 int n; 244 245 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 246 ib_umem_release(mmr->umem); 247 mmr->umem = mlx4_get_umem_mr(mr->device, start, length, 248 mr_access_flags); 249 if (IS_ERR(mmr->umem)) { 250 err = PTR_ERR(mmr->umem); 251 /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ 252 mmr->umem = NULL; 253 goto release_mpt_entry; 254 } 255 n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); 256 shift = PAGE_SHIFT; 257 258 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, 259 virt_addr, length, n, shift, 260 *pmpt_entry); 261 if (err) { 262 ib_umem_release(mmr->umem); 263 goto release_mpt_entry; 264 } 265 mmr->mmr.iova = virt_addr; 266 mmr->mmr.size = length; 267 268 err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); 269 if (err) { 270 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 271 ib_umem_release(mmr->umem); 272 goto release_mpt_entry; 273 } 274 } 275 276 /* If we couldn't transfer the MR to the HCA, just remember to 277 * return a failure. But dereg_mr will free the resources. 278 */ 279 err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); 280 if (!err && flags & IB_MR_REREG_ACCESS) 281 mmr->mmr.access = mr_access_flags; 282 283 release_mpt_entry: 284 mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); 285 if (err) 286 return ERR_PTR(err); 287 return NULL; 288 } 289 290 static int 291 mlx4_alloc_priv_pages(struct ib_device *device, 292 struct mlx4_ib_mr *mr, 293 int max_pages) 294 { 295 int ret; 296 297 /* Ensure that size is aligned to DMA cacheline 298 * requirements. 299 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES 300 * so page_map_size will never cross PAGE_SIZE. 301 */ 302 mr->page_map_size = roundup(max_pages * sizeof(u64), 303 MLX4_MR_PAGES_ALIGN); 304 305 /* Prevent cross page boundary allocation. */ 306 mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); 307 if (!mr->pages) 308 return -ENOMEM; 309 310 mr->page_map = dma_map_single(device->dev.parent, mr->pages, 311 mr->page_map_size, DMA_TO_DEVICE); 312 313 if (dma_mapping_error(device->dev.parent, mr->page_map)) { 314 ret = -ENOMEM; 315 goto err; 316 } 317 318 return 0; 319 320 err: 321 free_page((unsigned long)mr->pages); 322 return ret; 323 } 324 325 static void 326 mlx4_free_priv_pages(struct mlx4_ib_mr *mr) 327 { 328 if (mr->pages) { 329 struct ib_device *device = mr->ibmr.device; 330 331 dma_unmap_single(device->dev.parent, mr->page_map, 332 mr->page_map_size, DMA_TO_DEVICE); 333 free_page((unsigned long)mr->pages); 334 mr->pages = NULL; 335 } 336 } 337 338 int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 339 { 340 struct mlx4_ib_mr *mr = to_mmr(ibmr); 341 int ret; 342 343 mlx4_free_priv_pages(mr); 344 345 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 346 if (ret) 347 return ret; 348 if (mr->umem) 349 ib_umem_release(mr->umem); 350 kfree(mr); 351 352 return 0; 353 } 354 355 int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 356 { 357 struct mlx4_ib_dev *dev = to_mdev(ibmw->device); 358 struct mlx4_ib_mw *mw = to_mmw(ibmw); 359 int err; 360 361 err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, 362 to_mlx4_type(ibmw->type), &mw->mmw); 363 if (err) 364 return err; 365 366 err = mlx4_mw_enable(dev->dev, &mw->mmw); 367 if (err) 368 goto err_mw; 369 370 ibmw->rkey = mw->mmw.key; 371 return 0; 372 373 err_mw: 374 mlx4_mw_free(dev->dev, &mw->mmw); 375 return err; 376 } 377 378 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) 379 { 380 struct mlx4_ib_mw *mw = to_mmw(ibmw); 381 382 mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); 383 return 0; 384 } 385 386 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 387 u32 max_num_sg) 388 { 389 struct mlx4_ib_dev *dev = to_mdev(pd->device); 390 struct mlx4_ib_mr *mr; 391 int err; 392 393 if (mr_type != IB_MR_TYPE_MEM_REG || 394 max_num_sg > MLX4_MAX_FAST_REG_PAGES) 395 return ERR_PTR(-EINVAL); 396 397 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 398 if (!mr) 399 return ERR_PTR(-ENOMEM); 400 401 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 402 max_num_sg, 0, &mr->mmr); 403 if (err) 404 goto err_free; 405 406 err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); 407 if (err) 408 goto err_free_mr; 409 410 mr->max_pages = max_num_sg; 411 err = mlx4_mr_enable(dev->dev, &mr->mmr); 412 if (err) 413 goto err_free_pl; 414 415 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 416 mr->umem = NULL; 417 418 return &mr->ibmr; 419 420 err_free_pl: 421 mr->ibmr.device = pd->device; 422 mlx4_free_priv_pages(mr); 423 err_free_mr: 424 (void) mlx4_mr_free(dev->dev, &mr->mmr); 425 err_free: 426 kfree(mr); 427 return ERR_PTR(err); 428 } 429 430 static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) 431 { 432 struct mlx4_ib_mr *mr = to_mmr(ibmr); 433 434 if (unlikely(mr->npages == mr->max_pages)) 435 return -ENOMEM; 436 437 mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); 438 439 return 0; 440 } 441 442 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 443 unsigned int *sg_offset) 444 { 445 struct mlx4_ib_mr *mr = to_mmr(ibmr); 446 int rc; 447 448 mr->npages = 0; 449 450 ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, 451 mr->page_map_size, DMA_TO_DEVICE); 452 453 rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); 454 455 ib_dma_sync_single_for_device(ibmr->device, mr->page_map, 456 mr->page_map_size, DMA_TO_DEVICE); 457 458 return rc; 459 } 460