1 /*
2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <config.h>
34
35 #include <signal.h>
36 #include <sys/ipc.h>
37 #include <sys/shm.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <errno.h>
41
42 #include "mlx5.h"
43 #include "bitmap.h"
44
mlx5_bitmap_init(struct mlx5_bitmap * bitmap,uint32_t num,uint32_t mask)45 static int mlx5_bitmap_init(struct mlx5_bitmap *bitmap, uint32_t num,
46 uint32_t mask)
47 {
48 bitmap->last = 0;
49 bitmap->top = 0;
50 bitmap->max = num;
51 bitmap->avail = num;
52 bitmap->mask = mask;
53 bitmap->avail = bitmap->max;
54 bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(uint32_t));
55 if (!bitmap->table)
56 return -ENOMEM;
57
58 return 0;
59 }
60
bitmap_free_range(struct mlx5_bitmap * bitmap,uint32_t obj,int cnt)61 static void bitmap_free_range(struct mlx5_bitmap *bitmap, uint32_t obj,
62 int cnt)
63 {
64 int i;
65
66 obj &= bitmap->max - 1;
67
68 for (i = 0; i < cnt; i++)
69 mlx5_clear_bit(obj + i, bitmap->table);
70 bitmap->last = min(bitmap->last, obj);
71 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
72 bitmap->avail += cnt;
73 }
74
bitmap_empty(struct mlx5_bitmap * bitmap)75 static int bitmap_empty(struct mlx5_bitmap *bitmap)
76 {
77 return (bitmap->avail == bitmap->max) ? 1 : 0;
78 }
79
bitmap_avail(struct mlx5_bitmap * bitmap)80 static int bitmap_avail(struct mlx5_bitmap *bitmap)
81 {
82 return bitmap->avail;
83 }
84
mlx5_bitmap_cleanup(struct mlx5_bitmap * bitmap)85 static void mlx5_bitmap_cleanup(struct mlx5_bitmap *bitmap)
86 {
87 if (bitmap->table)
88 free(bitmap->table);
89 }
90
free_huge_mem(struct mlx5_hugetlb_mem * hmem)91 static void free_huge_mem(struct mlx5_hugetlb_mem *hmem)
92 {
93 mlx5_bitmap_cleanup(&hmem->bitmap);
94 if (shmdt(hmem->shmaddr) == -1)
95 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
96 shmctl(hmem->shmid, IPC_RMID, NULL);
97 free(hmem);
98 }
99
mlx5_bitmap_alloc(struct mlx5_bitmap * bitmap)100 static int mlx5_bitmap_alloc(struct mlx5_bitmap *bitmap)
101 {
102 uint32_t obj;
103 int ret;
104
105 obj = mlx5_find_first_zero_bit(bitmap->table, bitmap->max);
106 if (obj < bitmap->max) {
107 mlx5_set_bit(obj, bitmap->table);
108 bitmap->last = (obj + 1);
109 if (bitmap->last == bitmap->max)
110 bitmap->last = 0;
111 obj |= bitmap->top;
112 ret = obj;
113 } else
114 ret = -1;
115
116 if (ret != -1)
117 --bitmap->avail;
118
119 return ret;
120 }
121
find_aligned_range(unsigned long * bitmap,uint32_t start,uint32_t nbits,int len,int alignment)122 static uint32_t find_aligned_range(unsigned long *bitmap,
123 uint32_t start, uint32_t nbits,
124 int len, int alignment)
125 {
126 uint32_t end, i;
127
128 again:
129 start = align(start, alignment);
130
131 while ((start < nbits) && mlx5_test_bit(start, bitmap))
132 start += alignment;
133
134 if (start >= nbits)
135 return -1;
136
137 end = start + len;
138 if (end > nbits)
139 return -1;
140
141 for (i = start + 1; i < end; i++) {
142 if (mlx5_test_bit(i, bitmap)) {
143 start = i + 1;
144 goto again;
145 }
146 }
147
148 return start;
149 }
150
bitmap_alloc_range(struct mlx5_bitmap * bitmap,int cnt,int align)151 static int bitmap_alloc_range(struct mlx5_bitmap *bitmap, int cnt,
152 int align)
153 {
154 uint32_t obj;
155 int ret, i;
156
157 if (cnt == 1 && align == 1)
158 return mlx5_bitmap_alloc(bitmap);
159
160 if (cnt > bitmap->max)
161 return -1;
162
163 obj = find_aligned_range(bitmap->table, bitmap->last,
164 bitmap->max, cnt, align);
165 if (obj >= bitmap->max) {
166 bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
167 obj = find_aligned_range(bitmap->table, 0, bitmap->max,
168 cnt, align);
169 }
170
171 if (obj < bitmap->max) {
172 for (i = 0; i < cnt; i++)
173 mlx5_set_bit(obj + i, bitmap->table);
174 if (obj == bitmap->last) {
175 bitmap->last = (obj + cnt);
176 if (bitmap->last >= bitmap->max)
177 bitmap->last = 0;
178 }
179 obj |= bitmap->top;
180 ret = obj;
181 } else
182 ret = -1;
183
184 if (ret != -1)
185 bitmap->avail -= cnt;
186
187 return obj;
188 }
189
alloc_huge_mem(size_t size)190 static struct mlx5_hugetlb_mem *alloc_huge_mem(size_t size)
191 {
192 struct mlx5_hugetlb_mem *hmem;
193 size_t shm_len;
194
195 hmem = malloc(sizeof(*hmem));
196 if (!hmem)
197 return NULL;
198
199 shm_len = align(size, MLX5_SHM_LENGTH);
200 hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W);
201 if (hmem->shmid == -1) {
202 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
203 goto out_free;
204 }
205
206 hmem->shmaddr = shmat(hmem->shmid, MLX5_SHM_ADDR, MLX5_SHMAT_FLAGS);
207 if (hmem->shmaddr == (void *)-1) {
208 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
209 goto out_rmid;
210 }
211
212 if (mlx5_bitmap_init(&hmem->bitmap, shm_len / MLX5_Q_CHUNK_SIZE,
213 shm_len / MLX5_Q_CHUNK_SIZE - 1)) {
214 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
215 goto out_shmdt;
216 }
217
218 /*
219 * Marked to be destroyed when process detaches from shmget segment
220 */
221 shmctl(hmem->shmid, IPC_RMID, NULL);
222
223 return hmem;
224
225 out_shmdt:
226 if (shmdt(hmem->shmaddr) == -1)
227 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
228
229 out_rmid:
230 shmctl(hmem->shmid, IPC_RMID, NULL);
231
232 out_free:
233 free(hmem);
234 return NULL;
235 }
236
alloc_huge_buf(struct mlx5_context * mctx,struct mlx5_buf * buf,size_t size,int page_size)237 static int alloc_huge_buf(struct mlx5_context *mctx, struct mlx5_buf *buf,
238 size_t size, int page_size)
239 {
240 int found = 0;
241 int nchunk;
242 struct mlx5_hugetlb_mem *hmem;
243 int ret;
244
245 buf->length = align(size, MLX5_Q_CHUNK_SIZE);
246 nchunk = buf->length / MLX5_Q_CHUNK_SIZE;
247
248 mlx5_spin_lock(&mctx->hugetlb_lock);
249 TAILQ_FOREACH(hmem, &mctx->hugetlb_list, entry) {
250 if (bitmap_avail(&hmem->bitmap)) {
251 buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
252 if (buf->base != -1) {
253 buf->hmem = hmem;
254 found = 1;
255 break;
256 }
257 }
258 }
259 mlx5_spin_unlock(&mctx->hugetlb_lock);
260
261 if (!found) {
262 hmem = alloc_huge_mem(buf->length);
263 if (!hmem)
264 return -1;
265
266 buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
267 if (buf->base == -1) {
268 free_huge_mem(hmem);
269 /* TBD: remove after proven stability */
270 fprintf(stderr, "BUG: huge allocation\n");
271 return -1;
272 }
273
274 buf->hmem = hmem;
275
276 mlx5_spin_lock(&mctx->hugetlb_lock);
277 if (bitmap_avail(&hmem->bitmap))
278 TAILQ_INSERT_HEAD(&mctx->hugetlb_list, hmem, entry);
279 else
280 TAILQ_INSERT_TAIL(&mctx->hugetlb_list, hmem, entry);
281 mlx5_spin_unlock(&mctx->hugetlb_lock);
282 }
283
284 buf->buf = hmem->shmaddr + buf->base * MLX5_Q_CHUNK_SIZE;
285
286 ret = ibv_dontfork_range(buf->buf, buf->length);
287 if (ret) {
288 mlx5_dbg(stderr, MLX5_DBG_CONTIG, "\n");
289 goto out_fork;
290 }
291 buf->type = MLX5_ALLOC_TYPE_HUGE;
292
293 return 0;
294
295 out_fork:
296 mlx5_spin_lock(&mctx->hugetlb_lock);
297 bitmap_free_range(&hmem->bitmap, buf->base, nchunk);
298 if (bitmap_empty(&hmem->bitmap)) {
299 TAILQ_REMOVE(&mctx->hugetlb_list, hmem, entry);
300 mlx5_spin_unlock(&mctx->hugetlb_lock);
301 free_huge_mem(hmem);
302 } else
303 mlx5_spin_unlock(&mctx->hugetlb_lock);
304
305 return -1;
306 }
307
free_huge_buf(struct mlx5_context * ctx,struct mlx5_buf * buf)308 static void free_huge_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
309 {
310 int nchunk;
311
312 nchunk = buf->length / MLX5_Q_CHUNK_SIZE;
313 mlx5_spin_lock(&ctx->hugetlb_lock);
314 bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk);
315 if (bitmap_empty(&buf->hmem->bitmap)) {
316 TAILQ_REMOVE(&ctx->hugetlb_list, buf->hmem, entry);
317 mlx5_spin_unlock(&ctx->hugetlb_lock);
318 free_huge_mem(buf->hmem);
319 } else
320 mlx5_spin_unlock(&ctx->hugetlb_lock);
321 }
322
mlx5_alloc_prefered_buf(struct mlx5_context * mctx,struct mlx5_buf * buf,size_t size,int page_size,enum mlx5_alloc_type type,const char * component)323 int mlx5_alloc_prefered_buf(struct mlx5_context *mctx,
324 struct mlx5_buf *buf,
325 size_t size, int page_size,
326 enum mlx5_alloc_type type,
327 const char *component)
328 {
329 int ret;
330
331 /*
332 * Fallback mechanism priority:
333 * huge pages
334 * contig pages
335 * default
336 */
337 if (type == MLX5_ALLOC_TYPE_HUGE ||
338 type == MLX5_ALLOC_TYPE_PREFER_HUGE ||
339 type == MLX5_ALLOC_TYPE_ALL) {
340 ret = alloc_huge_buf(mctx, buf, size, page_size);
341 if (!ret)
342 return 0;
343
344 if (type == MLX5_ALLOC_TYPE_HUGE)
345 return -1;
346
347 mlx5_dbg(stderr, MLX5_DBG_CONTIG,
348 "Huge mode allocation failed, fallback to %s mode\n",
349 MLX5_ALLOC_TYPE_ALL ? "contig" : "default");
350 }
351
352 if (type == MLX5_ALLOC_TYPE_CONTIG ||
353 type == MLX5_ALLOC_TYPE_PREFER_CONTIG ||
354 type == MLX5_ALLOC_TYPE_ALL) {
355 ret = mlx5_alloc_buf_contig(mctx, buf, size, page_size, component);
356 if (!ret)
357 return 0;
358
359 if (type == MLX5_ALLOC_TYPE_CONTIG)
360 return -1;
361 mlx5_dbg(stderr, MLX5_DBG_CONTIG,
362 "Contig allocation failed, fallback to default mode\n");
363 }
364
365 return mlx5_alloc_buf(buf, size, page_size);
366
367 }
368
mlx5_free_actual_buf(struct mlx5_context * ctx,struct mlx5_buf * buf)369 int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
370 {
371 int err = 0;
372
373 switch (buf->type) {
374 case MLX5_ALLOC_TYPE_ANON:
375 mlx5_free_buf(buf);
376 break;
377
378 case MLX5_ALLOC_TYPE_HUGE:
379 free_huge_buf(ctx, buf);
380 break;
381
382 case MLX5_ALLOC_TYPE_CONTIG:
383 mlx5_free_buf_contig(ctx, buf);
384 break;
385 default:
386 fprintf(stderr, "Bad allocation type\n");
387 }
388
389 return err;
390 }
391
392 /* This function computes log2(v) rounded up.
393 We don't want to have a dependency to libm which exposes ceil & log2 APIs.
394 Code was written based on public domain code:
395 URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog.
396 */
mlx5_get_block_order(uint32_t v)397 static uint32_t mlx5_get_block_order(uint32_t v)
398 {
399 static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
400 static const uint32_t shift_arr[] = {1, 2, 4, 8, 16};
401 int i;
402 uint32_t input_val = v;
403
404 register uint32_t r = 0;/* result of log2(v) will go here */
405 for (i = 4; i >= 0; i--) {
406 if (v & bits_arr[i]) {
407 v >>= shift_arr[i];
408 r |= shift_arr[i];
409 }
410 }
411 /* Rounding up if required */
412 r += !!(input_val & ((1 << r) - 1));
413
414 return r;
415 }
416
mlx5_get_alloc_type(const char * component,enum mlx5_alloc_type * alloc_type,enum mlx5_alloc_type default_type)417 void mlx5_get_alloc_type(const char *component,
418 enum mlx5_alloc_type *alloc_type,
419 enum mlx5_alloc_type default_type)
420
421 {
422 char *env_value;
423 char name[128];
424
425 snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component);
426
427 *alloc_type = default_type;
428
429 env_value = getenv(name);
430 if (env_value) {
431 if (!strcasecmp(env_value, "ANON"))
432 *alloc_type = MLX5_ALLOC_TYPE_ANON;
433 else if (!strcasecmp(env_value, "HUGE"))
434 *alloc_type = MLX5_ALLOC_TYPE_HUGE;
435 else if (!strcasecmp(env_value, "CONTIG"))
436 *alloc_type = MLX5_ALLOC_TYPE_CONTIG;
437 else if (!strcasecmp(env_value, "PREFER_CONTIG"))
438 *alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG;
439 else if (!strcasecmp(env_value, "PREFER_HUGE"))
440 *alloc_type = MLX5_ALLOC_TYPE_PREFER_HUGE;
441 else if (!strcasecmp(env_value, "ALL"))
442 *alloc_type = MLX5_ALLOC_TYPE_ALL;
443 }
444 }
445
mlx5_alloc_get_env_info(int * max_block_log,int * min_block_log,const char * component)446 static void mlx5_alloc_get_env_info(int *max_block_log,
447 int *min_block_log,
448 const char *component)
449
450 {
451 char *env;
452 int value;
453 char name[128];
454
455 /* First set defaults */
456 *max_block_log = MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE;
457 *min_block_log = MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE;
458
459 snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component);
460 env = getenv(name);
461 if (env) {
462 value = atoi(env);
463 if (value <= MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE &&
464 value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE)
465 *max_block_log = value;
466 else
467 fprintf(stderr, "Invalid value %d for %s\n",
468 value, name);
469 }
470 sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component);
471 env = getenv(name);
472 if (env) {
473 value = atoi(env);
474 if (value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE &&
475 value <= *max_block_log)
476 *min_block_log = value;
477 else
478 fprintf(stderr, "Invalid value %d for %s\n",
479 value, name);
480 }
481 }
482
mlx5_alloc_buf_contig(struct mlx5_context * mctx,struct mlx5_buf * buf,size_t size,int page_size,const char * component)483 int mlx5_alloc_buf_contig(struct mlx5_context *mctx,
484 struct mlx5_buf *buf, size_t size,
485 int page_size,
486 const char *component)
487 {
488 void *addr = MAP_FAILED;
489 int block_size_exp;
490 int max_block_log;
491 int min_block_log;
492 struct ibv_context *context = &mctx->ibv_ctx;
493 off_t offset;
494
495 mlx5_alloc_get_env_info(&max_block_log,
496 &min_block_log,
497 component);
498
499 block_size_exp = mlx5_get_block_order(size);
500
501 if (block_size_exp > max_block_log)
502 block_size_exp = max_block_log;
503
504 do {
505 offset = 0;
506 set_command(MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD, &offset);
507 set_order(block_size_exp, &offset);
508 addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED,
509 context->cmd_fd, page_size * offset);
510 if (addr != MAP_FAILED)
511 break;
512
513 /*
514 * The kernel returns EINVAL if not supported
515 */
516 if (errno == EINVAL)
517 return -1;
518
519 block_size_exp -= 1;
520 } while (block_size_exp >= min_block_log);
521 mlx5_dbg(mctx->dbg_fp, MLX5_DBG_CONTIG, "block order %d, addr %p\n",
522 block_size_exp, addr);
523
524 if (addr == MAP_FAILED)
525 return -1;
526
527 if (ibv_dontfork_range(addr, size)) {
528 munmap(addr, size);
529 return -1;
530 }
531
532 buf->buf = addr;
533 buf->length = size;
534 buf->type = MLX5_ALLOC_TYPE_CONTIG;
535
536 return 0;
537 }
538
mlx5_free_buf_contig(struct mlx5_context * mctx,struct mlx5_buf * buf)539 void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf)
540 {
541 ibv_dofork_range(buf->buf, buf->length);
542 munmap(buf->buf, buf->length);
543 }
544
mlx5_alloc_buf(struct mlx5_buf * buf,size_t size,int page_size)545 int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size)
546 {
547 int ret;
548 int al_size;
549
550 al_size = align(size, page_size);
551 ret = posix_memalign(&buf->buf, page_size, al_size);
552 if (ret)
553 return ret;
554
555 ret = ibv_dontfork_range(buf->buf, al_size);
556 if (ret)
557 free(buf->buf);
558
559 if (!ret) {
560 buf->length = al_size;
561 buf->type = MLX5_ALLOC_TYPE_ANON;
562 }
563
564 return ret;
565 }
566
mlx5_free_buf(struct mlx5_buf * buf)567 void mlx5_free_buf(struct mlx5_buf *buf)
568 {
569 ibv_dofork_range(buf->buf, buf->length);
570 free(buf->buf);
571 }
572