1 /* 2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <signal.h> 36 #include <sys/ipc.h> 37 #include <sys/shm.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <errno.h> 41 42 #include "mlx5.h" 43 #include "bitmap.h" 44 45 static int mlx5_bitmap_init(struct mlx5_bitmap *bitmap, uint32_t num, 46 uint32_t mask) 47 { 48 bitmap->last = 0; 49 bitmap->top = 0; 50 bitmap->max = num; 51 bitmap->avail = num; 52 bitmap->mask = mask; 53 bitmap->avail = bitmap->max; 54 bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(uint32_t)); 55 if (!bitmap->table) 56 return -ENOMEM; 57 58 return 0; 59 } 60 61 static void bitmap_free_range(struct mlx5_bitmap *bitmap, uint32_t obj, 62 int cnt) 63 { 64 int i; 65 66 obj &= bitmap->max - 1; 67 68 for (i = 0; i < cnt; i++) 69 mlx5_clear_bit(obj + i, bitmap->table); 70 bitmap->last = min(bitmap->last, obj); 71 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; 72 bitmap->avail += cnt; 73 } 74 75 static int bitmap_empty(struct mlx5_bitmap *bitmap) 76 { 77 return (bitmap->avail == bitmap->max) ? 1 : 0; 78 } 79 80 static int bitmap_avail(struct mlx5_bitmap *bitmap) 81 { 82 return bitmap->avail; 83 } 84 85 static void mlx5_bitmap_cleanup(struct mlx5_bitmap *bitmap) 86 { 87 if (bitmap->table) 88 free(bitmap->table); 89 } 90 91 static void free_huge_mem(struct mlx5_hugetlb_mem *hmem) 92 { 93 mlx5_bitmap_cleanup(&hmem->bitmap); 94 if (shmdt(hmem->shmaddr) == -1) 95 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); 96 shmctl(hmem->shmid, IPC_RMID, NULL); 97 free(hmem); 98 } 99 100 static int mlx5_bitmap_alloc(struct mlx5_bitmap *bitmap) 101 { 102 uint32_t obj; 103 int ret; 104 105 obj = mlx5_find_first_zero_bit(bitmap->table, bitmap->max); 106 if (obj < bitmap->max) { 107 mlx5_set_bit(obj, bitmap->table); 108 bitmap->last = (obj + 1); 109 if (bitmap->last == bitmap->max) 110 bitmap->last = 0; 111 obj |= bitmap->top; 112 ret = obj; 113 } else 114 ret = -1; 115 116 if (ret != -1) 117 --bitmap->avail; 118 119 return ret; 120 } 121 122 static uint32_t find_aligned_range(unsigned long *bitmap, 123 uint32_t start, uint32_t nbits, 124 int len, int alignment) 125 { 126 uint32_t end, i; 127 128 again: 129 start = align(start, alignment); 130 131 while ((start < nbits) && mlx5_test_bit(start, bitmap)) 132 start += alignment; 133 134 if (start >= nbits) 135 return -1; 136 137 end = start + len; 138 if (end > nbits) 139 return -1; 140 141 for (i = start + 1; i < end; i++) { 142 if (mlx5_test_bit(i, bitmap)) { 143 start = i + 1; 144 goto again; 145 } 146 } 147 148 return start; 149 } 150 151 static int bitmap_alloc_range(struct mlx5_bitmap *bitmap, int cnt, 152 int align) 153 { 154 uint32_t obj; 155 int ret, i; 156 157 if (cnt == 1 && align == 1) 158 return mlx5_bitmap_alloc(bitmap); 159 160 if (cnt > bitmap->max) 161 return -1; 162 163 obj = find_aligned_range(bitmap->table, bitmap->last, 164 bitmap->max, cnt, align); 165 if (obj >= bitmap->max) { 166 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; 167 obj = find_aligned_range(bitmap->table, 0, bitmap->max, 168 cnt, align); 169 } 170 171 if (obj < bitmap->max) { 172 for (i = 0; i < cnt; i++) 173 mlx5_set_bit(obj + i, bitmap->table); 174 if (obj == bitmap->last) { 175 bitmap->last = (obj + cnt); 176 if (bitmap->last >= bitmap->max) 177 bitmap->last = 0; 178 } 179 obj |= bitmap->top; 180 ret = obj; 181 } else 182 ret = -1; 183 184 if (ret != -1) 185 bitmap->avail -= cnt; 186 187 return obj; 188 } 189 190 static struct mlx5_hugetlb_mem *alloc_huge_mem(size_t size) 191 { 192 struct mlx5_hugetlb_mem *hmem; 193 size_t shm_len; 194 195 hmem = malloc(sizeof(*hmem)); 196 if (!hmem) 197 return NULL; 198 199 shm_len = align(size, MLX5_SHM_LENGTH); 200 hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); 201 if (hmem->shmid == -1) { 202 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); 203 goto out_free; 204 } 205 206 hmem->shmaddr = shmat(hmem->shmid, MLX5_SHM_ADDR, MLX5_SHMAT_FLAGS); 207 if (hmem->shmaddr == (void *)-1) { 208 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); 209 goto out_rmid; 210 } 211 212 if (mlx5_bitmap_init(&hmem->bitmap, shm_len / MLX5_Q_CHUNK_SIZE, 213 shm_len / MLX5_Q_CHUNK_SIZE - 1)) { 214 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); 215 goto out_shmdt; 216 } 217 218 /* 219 * Marked to be destroyed when process detaches from shmget segment 220 */ 221 shmctl(hmem->shmid, IPC_RMID, NULL); 222 223 return hmem; 224 225 out_shmdt: 226 if (shmdt(hmem->shmaddr) == -1) 227 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); 228 229 out_rmid: 230 shmctl(hmem->shmid, IPC_RMID, NULL); 231 232 out_free: 233 free(hmem); 234 return NULL; 235 } 236 237 static int alloc_huge_buf(struct mlx5_context *mctx, struct mlx5_buf *buf, 238 size_t size, int page_size) 239 { 240 int found = 0; 241 int nchunk; 242 struct mlx5_hugetlb_mem *hmem; 243 int ret; 244 245 buf->length = align(size, MLX5_Q_CHUNK_SIZE); 246 nchunk = buf->length / MLX5_Q_CHUNK_SIZE; 247 248 mlx5_spin_lock(&mctx->hugetlb_lock); 249 TAILQ_FOREACH(hmem, &mctx->hugetlb_list, entry) { 250 if (bitmap_avail(&hmem->bitmap)) { 251 buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); 252 if (buf->base != -1) { 253 buf->hmem = hmem; 254 found = 1; 255 break; 256 } 257 } 258 } 259 mlx5_spin_unlock(&mctx->hugetlb_lock); 260 261 if (!found) { 262 hmem = alloc_huge_mem(buf->length); 263 if (!hmem) 264 return -1; 265 266 buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); 267 if (buf->base == -1) { 268 free_huge_mem(hmem); 269 /* TBD: remove after proven stability */ 270 fprintf(stderr, "BUG: huge allocation\n"); 271 return -1; 272 } 273 274 buf->hmem = hmem; 275 276 mlx5_spin_lock(&mctx->hugetlb_lock); 277 if (bitmap_avail(&hmem->bitmap)) 278 TAILQ_INSERT_HEAD(&mctx->hugetlb_list, hmem, entry); 279 else 280 TAILQ_INSERT_TAIL(&mctx->hugetlb_list, hmem, entry); 281 mlx5_spin_unlock(&mctx->hugetlb_lock); 282 } 283 284 buf->buf = hmem->shmaddr + buf->base * MLX5_Q_CHUNK_SIZE; 285 286 ret = ibv_dontfork_range(buf->buf, buf->length); 287 if (ret) { 288 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "\n"); 289 goto out_fork; 290 } 291 buf->type = MLX5_ALLOC_TYPE_HUGE; 292 293 return 0; 294 295 out_fork: 296 mlx5_spin_lock(&mctx->hugetlb_lock); 297 bitmap_free_range(&hmem->bitmap, buf->base, nchunk); 298 if (bitmap_empty(&hmem->bitmap)) { 299 TAILQ_REMOVE(&mctx->hugetlb_list, hmem, entry); 300 mlx5_spin_unlock(&mctx->hugetlb_lock); 301 free_huge_mem(hmem); 302 } else 303 mlx5_spin_unlock(&mctx->hugetlb_lock); 304 305 return -1; 306 } 307 308 static void free_huge_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) 309 { 310 int nchunk; 311 312 nchunk = buf->length / MLX5_Q_CHUNK_SIZE; 313 mlx5_spin_lock(&ctx->hugetlb_lock); 314 bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk); 315 if (bitmap_empty(&buf->hmem->bitmap)) { 316 TAILQ_REMOVE(&ctx->hugetlb_list, buf->hmem, entry); 317 mlx5_spin_unlock(&ctx->hugetlb_lock); 318 free_huge_mem(buf->hmem); 319 } else 320 mlx5_spin_unlock(&ctx->hugetlb_lock); 321 } 322 323 int mlx5_alloc_prefered_buf(struct mlx5_context *mctx, 324 struct mlx5_buf *buf, 325 size_t size, int page_size, 326 enum mlx5_alloc_type type, 327 const char *component) 328 { 329 int ret; 330 331 /* 332 * Fallback mechanism priority: 333 * huge pages 334 * contig pages 335 * default 336 */ 337 if (type == MLX5_ALLOC_TYPE_HUGE || 338 type == MLX5_ALLOC_TYPE_PREFER_HUGE || 339 type == MLX5_ALLOC_TYPE_ALL) { 340 ret = alloc_huge_buf(mctx, buf, size, page_size); 341 if (!ret) 342 return 0; 343 344 if (type == MLX5_ALLOC_TYPE_HUGE) 345 return -1; 346 347 mlx5_dbg(stderr, MLX5_DBG_CONTIG, 348 "Huge mode allocation failed, fallback to %s mode\n", 349 MLX5_ALLOC_TYPE_ALL ? "contig" : "default"); 350 } 351 352 if (type == MLX5_ALLOC_TYPE_CONTIG || 353 type == MLX5_ALLOC_TYPE_PREFER_CONTIG || 354 type == MLX5_ALLOC_TYPE_ALL) { 355 ret = mlx5_alloc_buf_contig(mctx, buf, size, page_size, component); 356 if (!ret) 357 return 0; 358 359 if (type == MLX5_ALLOC_TYPE_CONTIG) 360 return -1; 361 mlx5_dbg(stderr, MLX5_DBG_CONTIG, 362 "Contig allocation failed, fallback to default mode\n"); 363 } 364 365 return mlx5_alloc_buf(buf, size, page_size); 366 367 } 368 369 int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) 370 { 371 int err = 0; 372 373 switch (buf->type) { 374 case MLX5_ALLOC_TYPE_ANON: 375 mlx5_free_buf(buf); 376 break; 377 378 case MLX5_ALLOC_TYPE_HUGE: 379 free_huge_buf(ctx, buf); 380 break; 381 382 case MLX5_ALLOC_TYPE_CONTIG: 383 mlx5_free_buf_contig(ctx, buf); 384 break; 385 default: 386 fprintf(stderr, "Bad allocation type\n"); 387 } 388 389 return err; 390 } 391 392 /* This function computes log2(v) rounded up. 393 We don't want to have a dependency to libm which exposes ceil & log2 APIs. 394 Code was written based on public domain code: 395 URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. 396 */ 397 static uint32_t mlx5_get_block_order(uint32_t v) 398 { 399 static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; 400 static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; 401 int i; 402 uint32_t input_val = v; 403 404 register uint32_t r = 0;/* result of log2(v) will go here */ 405 for (i = 4; i >= 0; i--) { 406 if (v & bits_arr[i]) { 407 v >>= shift_arr[i]; 408 r |= shift_arr[i]; 409 } 410 } 411 /* Rounding up if required */ 412 r += !!(input_val & ((1 << r) - 1)); 413 414 return r; 415 } 416 417 void mlx5_get_alloc_type(const char *component, 418 enum mlx5_alloc_type *alloc_type, 419 enum mlx5_alloc_type default_type) 420 421 { 422 char *env_value; 423 char name[128]; 424 425 snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); 426 427 *alloc_type = default_type; 428 429 env_value = getenv(name); 430 if (env_value) { 431 if (!strcasecmp(env_value, "ANON")) 432 *alloc_type = MLX5_ALLOC_TYPE_ANON; 433 else if (!strcasecmp(env_value, "HUGE")) 434 *alloc_type = MLX5_ALLOC_TYPE_HUGE; 435 else if (!strcasecmp(env_value, "CONTIG")) 436 *alloc_type = MLX5_ALLOC_TYPE_CONTIG; 437 else if (!strcasecmp(env_value, "PREFER_CONTIG")) 438 *alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG; 439 else if (!strcasecmp(env_value, "PREFER_HUGE")) 440 *alloc_type = MLX5_ALLOC_TYPE_PREFER_HUGE; 441 else if (!strcasecmp(env_value, "ALL")) 442 *alloc_type = MLX5_ALLOC_TYPE_ALL; 443 } 444 } 445 446 static void mlx5_alloc_get_env_info(int *max_block_log, 447 int *min_block_log, 448 const char *component) 449 450 { 451 char *env; 452 int value; 453 char name[128]; 454 455 /* First set defaults */ 456 *max_block_log = MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE; 457 *min_block_log = MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE; 458 459 snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); 460 env = getenv(name); 461 if (env) { 462 value = atoi(env); 463 if (value <= MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE && 464 value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE) 465 *max_block_log = value; 466 else 467 fprintf(stderr, "Invalid value %d for %s\n", 468 value, name); 469 } 470 sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); 471 env = getenv(name); 472 if (env) { 473 value = atoi(env); 474 if (value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE && 475 value <= *max_block_log) 476 *min_block_log = value; 477 else 478 fprintf(stderr, "Invalid value %d for %s\n", 479 value, name); 480 } 481 } 482 483 int mlx5_alloc_buf_contig(struct mlx5_context *mctx, 484 struct mlx5_buf *buf, size_t size, 485 int page_size, 486 const char *component) 487 { 488 void *addr = MAP_FAILED; 489 int block_size_exp; 490 int max_block_log; 491 int min_block_log; 492 struct ibv_context *context = &mctx->ibv_ctx; 493 off_t offset; 494 495 mlx5_alloc_get_env_info(&max_block_log, 496 &min_block_log, 497 component); 498 499 block_size_exp = mlx5_get_block_order(size); 500 501 if (block_size_exp > max_block_log) 502 block_size_exp = max_block_log; 503 504 do { 505 offset = 0; 506 set_command(MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD, &offset); 507 set_order(block_size_exp, &offset); 508 addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, 509 context->cmd_fd, page_size * offset); 510 if (addr != MAP_FAILED) 511 break; 512 513 /* 514 * The kernel returns EINVAL if not supported 515 */ 516 if (errno == EINVAL) 517 return -1; 518 519 block_size_exp -= 1; 520 } while (block_size_exp >= min_block_log); 521 mlx5_dbg(mctx->dbg_fp, MLX5_DBG_CONTIG, "block order %d, addr %p\n", 522 block_size_exp, addr); 523 524 if (addr == MAP_FAILED) 525 return -1; 526 527 if (ibv_dontfork_range(addr, size)) { 528 munmap(addr, size); 529 return -1; 530 } 531 532 buf->buf = addr; 533 buf->length = size; 534 buf->type = MLX5_ALLOC_TYPE_CONTIG; 535 536 return 0; 537 } 538 539 void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf) 540 { 541 ibv_dofork_range(buf->buf, buf->length); 542 munmap(buf->buf, buf->length); 543 } 544 545 int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size) 546 { 547 int ret; 548 int al_size; 549 550 al_size = align(size, page_size); 551 ret = posix_memalign(&buf->buf, page_size, al_size); 552 if (ret) 553 return ret; 554 555 ret = ibv_dontfork_range(buf->buf, al_size); 556 if (ret) 557 free(buf->buf); 558 559 if (!ret) { 560 buf->length = al_size; 561 buf->type = MLX5_ALLOC_TYPE_ANON; 562 } 563 564 return ret; 565 } 566 567 void mlx5_free_buf(struct mlx5_buf *buf) 568 { 569 ibv_dofork_range(buf->buf, buf->length); 570 free(buf->buf); 571 } 572