xref: /freebsd/contrib/ofed/libmlx5/buf.c (revision 350b7c3570aa6c87c537e54f706f1866f93a4142)
1 /*
2  * Copyright (c) 2012 Mellanox Technologies, Inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <config.h>
34 
35 #include <signal.h>
36 #include <sys/ipc.h>
37 #include <sys/shm.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 
42 #include "mlx5.h"
43 #include "bitmap.h"
44 
45 static int mlx5_bitmap_init(struct mlx5_bitmap *bitmap, uint32_t num,
46 			    uint32_t mask)
47 {
48 	bitmap->last = 0;
49 	bitmap->top  = 0;
50 	bitmap->max  = num;
51 	bitmap->avail = num;
52 	bitmap->mask = mask;
53 	bitmap->avail = bitmap->max;
54 	bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(uint32_t));
55 	if (!bitmap->table)
56 		return -ENOMEM;
57 
58 	return 0;
59 }
60 
61 static void bitmap_free_range(struct mlx5_bitmap *bitmap, uint32_t obj,
62 			      int cnt)
63 {
64 	int i;
65 
66 	obj &= bitmap->max - 1;
67 
68 	for (i = 0; i < cnt; i++)
69 		mlx5_clear_bit(obj + i, bitmap->table);
70 	bitmap->last = min(bitmap->last, obj);
71 	bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
72 	bitmap->avail += cnt;
73 }
74 
75 static int bitmap_empty(struct mlx5_bitmap *bitmap)
76 {
77 	return (bitmap->avail == bitmap->max) ? 1 : 0;
78 }
79 
80 static int bitmap_avail(struct mlx5_bitmap *bitmap)
81 {
82 	return bitmap->avail;
83 }
84 
85 static void mlx5_bitmap_cleanup(struct mlx5_bitmap *bitmap)
86 {
87 	if (bitmap->table)
88 		free(bitmap->table);
89 }
90 
91 static void free_huge_mem(struct mlx5_hugetlb_mem *hmem)
92 {
93 	mlx5_bitmap_cleanup(&hmem->bitmap);
94 	if (shmdt(hmem->shmaddr) == -1)
95 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
96 	shmctl(hmem->shmid, IPC_RMID, NULL);
97 	free(hmem);
98 }
99 
100 static int mlx5_bitmap_alloc(struct mlx5_bitmap *bitmap)
101 {
102 	uint32_t obj;
103 	int ret;
104 
105 	obj = mlx5_find_first_zero_bit(bitmap->table, bitmap->max);
106 	if (obj < bitmap->max) {
107 		mlx5_set_bit(obj, bitmap->table);
108 		bitmap->last = (obj + 1);
109 		if (bitmap->last == bitmap->max)
110 			bitmap->last = 0;
111 		obj |= bitmap->top;
112 		ret = obj;
113 	} else
114 		ret = -1;
115 
116 	if (ret != -1)
117 		--bitmap->avail;
118 
119 	return ret;
120 }
121 
122 static uint32_t find_aligned_range(unsigned long *bitmap,
123 				   uint32_t start, uint32_t nbits,
124 				   int len, int alignment)
125 {
126 	uint32_t end, i;
127 
128 again:
129 	start = align(start, alignment);
130 
131 	while ((start < nbits) && mlx5_test_bit(start, bitmap))
132 		start += alignment;
133 
134 	if (start >= nbits)
135 		return -1;
136 
137 	end = start + len;
138 	if (end > nbits)
139 		return -1;
140 
141 	for (i = start + 1; i < end; i++) {
142 		if (mlx5_test_bit(i, bitmap)) {
143 			start = i + 1;
144 			goto again;
145 		}
146 	}
147 
148 	return start;
149 }
150 
151 static int bitmap_alloc_range(struct mlx5_bitmap *bitmap, int cnt,
152 			      int align)
153 {
154 	uint32_t obj;
155 	int ret, i;
156 
157 	if (cnt == 1 && align == 1)
158 		return mlx5_bitmap_alloc(bitmap);
159 
160 	if (cnt > bitmap->max)
161 		return -1;
162 
163 	obj = find_aligned_range(bitmap->table, bitmap->last,
164 				 bitmap->max, cnt, align);
165 	if (obj >= bitmap->max) {
166 		bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
167 		obj = find_aligned_range(bitmap->table, 0, bitmap->max,
168 					 cnt, align);
169 	}
170 
171 	if (obj < bitmap->max) {
172 		for (i = 0; i < cnt; i++)
173 			mlx5_set_bit(obj + i, bitmap->table);
174 		if (obj == bitmap->last) {
175 			bitmap->last = (obj + cnt);
176 			if (bitmap->last >= bitmap->max)
177 				bitmap->last = 0;
178 		}
179 		obj |= bitmap->top;
180 		ret = obj;
181 	} else
182 		ret = -1;
183 
184 	if (ret != -1)
185 		bitmap->avail -= cnt;
186 
187 	return obj;
188 }
189 
190 static struct mlx5_hugetlb_mem *alloc_huge_mem(size_t size)
191 {
192 	struct mlx5_hugetlb_mem *hmem;
193 	size_t shm_len;
194 
195 	hmem = malloc(sizeof(*hmem));
196 	if (!hmem)
197 		return NULL;
198 
199 	shm_len = align(size, MLX5_SHM_LENGTH);
200 	hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W);
201 	if (hmem->shmid == -1) {
202 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
203 		goto out_free;
204 	}
205 
206 	hmem->shmaddr = shmat(hmem->shmid, MLX5_SHM_ADDR, MLX5_SHMAT_FLAGS);
207 	if (hmem->shmaddr == (void *)-1) {
208 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
209 		goto out_rmid;
210 	}
211 
212 	if (mlx5_bitmap_init(&hmem->bitmap, shm_len / MLX5_Q_CHUNK_SIZE,
213 			     shm_len / MLX5_Q_CHUNK_SIZE - 1)) {
214 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
215 		goto out_shmdt;
216 	}
217 
218 	/*
219 	 * Marked to be destroyed when process detaches from shmget segment
220 	 */
221 	shmctl(hmem->shmid, IPC_RMID, NULL);
222 
223 	return hmem;
224 
225 out_shmdt:
226 	if (shmdt(hmem->shmaddr) == -1)
227 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno));
228 
229 out_rmid:
230 	shmctl(hmem->shmid, IPC_RMID, NULL);
231 
232 out_free:
233 	free(hmem);
234 	return NULL;
235 }
236 
237 static int alloc_huge_buf(struct mlx5_context *mctx, struct mlx5_buf *buf,
238 			  size_t size, int page_size)
239 {
240 	int found = 0;
241 	int nchunk;
242 	struct mlx5_hugetlb_mem *hmem;
243 	int ret;
244 
245 	buf->length = align(size, MLX5_Q_CHUNK_SIZE);
246 	nchunk = buf->length / MLX5_Q_CHUNK_SIZE;
247 
248 	mlx5_spin_lock(&mctx->hugetlb_lock);
249 	TAILQ_FOREACH(hmem, &mctx->hugetlb_list, entry) {
250 		if (bitmap_avail(&hmem->bitmap)) {
251 			buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
252 			if (buf->base != -1) {
253 				buf->hmem = hmem;
254 				found = 1;
255 				break;
256 			}
257 		}
258 	}
259 	mlx5_spin_unlock(&mctx->hugetlb_lock);
260 
261 	if (!found) {
262 		hmem = alloc_huge_mem(buf->length);
263 		if (!hmem)
264 			return -1;
265 
266 		buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
267 		if (buf->base == -1) {
268 			free_huge_mem(hmem);
269 			/* TBD: remove after proven stability */
270 			fprintf(stderr, "BUG: huge allocation\n");
271 			return -1;
272 		}
273 
274 		buf->hmem = hmem;
275 
276 		mlx5_spin_lock(&mctx->hugetlb_lock);
277 		if (bitmap_avail(&hmem->bitmap))
278 			TAILQ_INSERT_HEAD(&mctx->hugetlb_list, hmem, entry);
279 		else
280 			TAILQ_INSERT_TAIL(&mctx->hugetlb_list, hmem, entry);
281 		mlx5_spin_unlock(&mctx->hugetlb_lock);
282 	}
283 
284 	buf->buf = hmem->shmaddr + buf->base * MLX5_Q_CHUNK_SIZE;
285 
286 	ret = ibv_dontfork_range(buf->buf, buf->length);
287 	if (ret) {
288 		mlx5_dbg(stderr, MLX5_DBG_CONTIG, "\n");
289 		goto out_fork;
290 	}
291 	buf->type = MLX5_ALLOC_TYPE_HUGE;
292 
293 	return 0;
294 
295 out_fork:
296 	mlx5_spin_lock(&mctx->hugetlb_lock);
297 	bitmap_free_range(&hmem->bitmap, buf->base, nchunk);
298 	if (bitmap_empty(&hmem->bitmap)) {
299 		TAILQ_REMOVE(&mctx->hugetlb_list, hmem, entry);
300 		mlx5_spin_unlock(&mctx->hugetlb_lock);
301 		free_huge_mem(hmem);
302 	} else
303 		mlx5_spin_unlock(&mctx->hugetlb_lock);
304 
305 	return -1;
306 }
307 
308 static void free_huge_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
309 {
310 	int nchunk;
311 
312 	nchunk = buf->length / MLX5_Q_CHUNK_SIZE;
313 	mlx5_spin_lock(&ctx->hugetlb_lock);
314 	bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk);
315 	if (bitmap_empty(&buf->hmem->bitmap)) {
316 		TAILQ_REMOVE(&ctx->hugetlb_list, buf->hmem, entry);
317 		mlx5_spin_unlock(&ctx->hugetlb_lock);
318 		free_huge_mem(buf->hmem);
319 	} else
320 		mlx5_spin_unlock(&ctx->hugetlb_lock);
321 }
322 
323 int mlx5_alloc_prefered_buf(struct mlx5_context *mctx,
324 			    struct mlx5_buf *buf,
325 			    size_t size, int page_size,
326 			    enum mlx5_alloc_type type,
327 			    const char *component)
328 {
329 	int ret;
330 
331 	/*
332 	 * Fallback mechanism priority:
333 	 *	huge pages
334 	 *	contig pages
335 	 *	default
336 	 */
337 	if (type == MLX5_ALLOC_TYPE_HUGE ||
338 	    type == MLX5_ALLOC_TYPE_PREFER_HUGE ||
339 	    type == MLX5_ALLOC_TYPE_ALL) {
340 		ret = alloc_huge_buf(mctx, buf, size, page_size);
341 		if (!ret)
342 			return 0;
343 
344 		if (type == MLX5_ALLOC_TYPE_HUGE)
345 			return -1;
346 
347 		mlx5_dbg(stderr, MLX5_DBG_CONTIG,
348 			 "Huge mode allocation failed, fallback to %s mode\n",
349 			 MLX5_ALLOC_TYPE_ALL ? "contig" : "default");
350 	}
351 
352 	if (type == MLX5_ALLOC_TYPE_CONTIG ||
353 	    type == MLX5_ALLOC_TYPE_PREFER_CONTIG ||
354 	    type == MLX5_ALLOC_TYPE_ALL) {
355 		ret = mlx5_alloc_buf_contig(mctx, buf, size, page_size, component);
356 		if (!ret)
357 			return 0;
358 
359 		if (type == MLX5_ALLOC_TYPE_CONTIG)
360 			return -1;
361 		mlx5_dbg(stderr, MLX5_DBG_CONTIG,
362 			 "Contig allocation failed, fallback to default mode\n");
363 	}
364 
365 	return mlx5_alloc_buf(buf, size, page_size);
366 
367 }
368 
369 int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
370 {
371 	int err = 0;
372 
373 	switch (buf->type) {
374 	case MLX5_ALLOC_TYPE_ANON:
375 		mlx5_free_buf(buf);
376 		break;
377 
378 	case MLX5_ALLOC_TYPE_HUGE:
379 		free_huge_buf(ctx, buf);
380 		break;
381 
382 	case MLX5_ALLOC_TYPE_CONTIG:
383 		mlx5_free_buf_contig(ctx, buf);
384 		break;
385 	default:
386 		fprintf(stderr, "Bad allocation type\n");
387 	}
388 
389 	return err;
390 }
391 
392 /* This function computes log2(v) rounded up.
393    We don't want to have a dependency to libm which exposes ceil & log2 APIs.
394    Code was written based on public domain code:
395 	URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog.
396 */
397 static uint32_t mlx5_get_block_order(uint32_t v)
398 {
399 	static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
400 	static const uint32_t shift_arr[] = {1, 2, 4, 8, 16};
401 	int i;
402 	uint32_t input_val = v;
403 
404 	register uint32_t r = 0;/* result of log2(v) will go here */
405 	for (i = 4; i >= 0; i--) {
406 		if (v & bits_arr[i]) {
407 			v >>= shift_arr[i];
408 			r |= shift_arr[i];
409 		}
410 	}
411 	/* Rounding up if required */
412 	r += !!(input_val & ((1 << r) - 1));
413 
414 	return r;
415 }
416 
417 void mlx5_get_alloc_type(const char *component,
418 			 enum mlx5_alloc_type *alloc_type,
419 			 enum mlx5_alloc_type default_type)
420 
421 {
422 	char *env_value;
423 	char name[128];
424 
425 	snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component);
426 
427 	*alloc_type = default_type;
428 
429 	env_value = getenv(name);
430 	if (env_value) {
431 		if (!strcasecmp(env_value, "ANON"))
432 			*alloc_type = MLX5_ALLOC_TYPE_ANON;
433 		else if (!strcasecmp(env_value, "HUGE"))
434 			*alloc_type = MLX5_ALLOC_TYPE_HUGE;
435 		else if (!strcasecmp(env_value, "CONTIG"))
436 			*alloc_type = MLX5_ALLOC_TYPE_CONTIG;
437 		else if (!strcasecmp(env_value, "PREFER_CONTIG"))
438 			*alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG;
439 		else if (!strcasecmp(env_value, "PREFER_HUGE"))
440 			*alloc_type = MLX5_ALLOC_TYPE_PREFER_HUGE;
441 		else if (!strcasecmp(env_value, "ALL"))
442 			*alloc_type = MLX5_ALLOC_TYPE_ALL;
443 	}
444 }
445 
446 static void mlx5_alloc_get_env_info(int *max_block_log,
447 				    int *min_block_log,
448 				    const char *component)
449 
450 {
451 	char *env;
452 	int value;
453 	char name[128];
454 
455 	/* First set defaults */
456 	*max_block_log = MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE;
457 	*min_block_log = MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE;
458 
459 	snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component);
460 	env = getenv(name);
461 	if (env) {
462 		value = atoi(env);
463 		if (value <= MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE &&
464 		    value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE)
465 			*max_block_log = value;
466 		else
467 			fprintf(stderr, "Invalid value %d for %s\n",
468 				value, name);
469 	}
470 	sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component);
471 	env = getenv(name);
472 	if (env) {
473 		value = atoi(env);
474 		if (value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE &&
475 		    value  <=  *max_block_log)
476 			*min_block_log = value;
477 		else
478 			fprintf(stderr, "Invalid value %d for %s\n",
479 				value, name);
480 	}
481 }
482 
483 int mlx5_alloc_buf_contig(struct mlx5_context *mctx,
484 			  struct mlx5_buf *buf, size_t size,
485 			  int page_size,
486 			  const char *component)
487 {
488 	void *addr = MAP_FAILED;
489 	int block_size_exp;
490 	int max_block_log;
491 	int min_block_log;
492 	struct ibv_context *context = &mctx->ibv_ctx;
493 	off_t offset;
494 
495 	mlx5_alloc_get_env_info(&max_block_log,
496 				&min_block_log,
497 				component);
498 
499 	block_size_exp = mlx5_get_block_order(size);
500 
501 	if (block_size_exp > max_block_log)
502 		block_size_exp = max_block_log;
503 
504 	do {
505 		offset = 0;
506 		set_command(MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD, &offset);
507 		set_order(block_size_exp, &offset);
508 		addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED,
509 			    context->cmd_fd, page_size * offset);
510 		if (addr != MAP_FAILED)
511 			break;
512 
513 		/*
514 		 *  The kernel returns EINVAL if not supported
515 		 */
516 		if (errno == EINVAL)
517 			return -1;
518 
519 		block_size_exp -= 1;
520 	} while (block_size_exp >= min_block_log);
521 	mlx5_dbg(mctx->dbg_fp, MLX5_DBG_CONTIG, "block order %d, addr %p\n",
522 		 block_size_exp, addr);
523 
524 	if (addr == MAP_FAILED)
525 		return -1;
526 
527 	if (ibv_dontfork_range(addr, size)) {
528 		munmap(addr, size);
529 		return -1;
530 	}
531 
532 	buf->buf = addr;
533 	buf->length = size;
534 	buf->type = MLX5_ALLOC_TYPE_CONTIG;
535 
536 	return 0;
537 }
538 
539 void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf)
540 {
541 	ibv_dofork_range(buf->buf, buf->length);
542 	munmap(buf->buf, buf->length);
543 }
544 
545 int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size)
546 {
547 	int ret;
548 	int al_size;
549 
550 	al_size = align(size, page_size);
551 	ret = posix_memalign(&buf->buf, page_size, al_size);
552 	if (ret)
553 		return ret;
554 
555 	ret = ibv_dontfork_range(buf->buf, al_size);
556 	if (ret)
557 		free(buf->buf);
558 
559 	if (!ret) {
560 		buf->length = al_size;
561 		buf->type = MLX5_ALLOC_TYPE_ANON;
562 	}
563 
564 	return ret;
565 }
566 
567 void mlx5_free_buf(struct mlx5_buf *buf)
568 {
569 	ibv_dofork_range(buf->buf, buf->length);
570 	free(buf->buf);
571 }
572