xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c (revision f9e3bd43d55f24331e5ea65f667dbb33716e7d6b)
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/highmem.h>
34 #include <linux/kernel.h>
35 #include <linux/delay.h>
36 #include <linux/mlx5/driver.h>
37 #include <linux/xarray.h>
38 #include "mlx5_core.h"
39 #include "lib/eq.h"
40 #include "lib/tout.h"
41 
42 enum {
43 	MLX5_PAGES_CANT_GIVE	= 0,
44 	MLX5_PAGES_GIVE		= 1,
45 	MLX5_PAGES_TAKE		= 2
46 };
47 
48 struct mlx5_pages_req {
49 	struct mlx5_core_dev *dev;
50 	u16	func_id;
51 	u8	ec_function;
52 	s32	npages;
53 	struct work_struct work;
54 	u8	release_all;
55 };
56 
57 struct fw_page {
58 	struct rb_node		rb_node;
59 	u64			addr;
60 	struct page	       *page;
61 	u32			function;
62 	unsigned long		bitmask;
63 	struct list_head	list;
64 	unsigned int free_count;
65 };
66 
67 enum {
68 	MLX5_MAX_RECLAIM_TIME_MILI	= 5000,
69 	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
70 };
71 
72 static u32 get_function(u16 func_id, bool ec_function)
73 {
74 	return (u32)func_id | (ec_function << 16);
75 }
76 
77 static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function)
78 {
79 	if (!func_id)
80 		return mlx5_core_is_ecpf(dev) && !ec_function ?
81 			MLX5_HOST_PF : MLX5_SELF;
82 
83 	if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) {
84 		if (ec_function)
85 			return MLX5_EC_VF;
86 		else
87 			return MLX5_VF;
88 	}
89 	return MLX5_SF;
90 }
91 
92 static u32 mlx5_get_ec_function(u32 function)
93 {
94 	return function >> 16;
95 }
96 
97 static u32 mlx5_get_func_id(u32 function)
98 {
99 	return function & 0xffff;
100 }
101 
102 static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)
103 {
104 	struct rb_root *root;
105 	int err;
106 
107 	root = xa_load(&dev->priv.page_root_xa, function);
108 	if (root)
109 		return root;
110 
111 	root = kzalloc_obj(*root);
112 	if (!root)
113 		return ERR_PTR(-ENOMEM);
114 
115 	err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL);
116 	if (err) {
117 		kfree(root);
118 		return ERR_PTR(err);
119 	}
120 
121 	*root = RB_ROOT;
122 
123 	return root;
124 }
125 
126 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function)
127 {
128 	struct rb_node *parent = NULL;
129 	struct rb_root *root;
130 	struct rb_node **new;
131 	struct fw_page *nfp;
132 	struct fw_page *tfp;
133 	int i;
134 
135 	root = page_root_per_function(dev, function);
136 	if (IS_ERR(root))
137 		return PTR_ERR(root);
138 
139 	new = &root->rb_node;
140 
141 	while (*new) {
142 		parent = *new;
143 		tfp = rb_entry(parent, struct fw_page, rb_node);
144 		if (tfp->addr < addr)
145 			new = &parent->rb_left;
146 		else if (tfp->addr > addr)
147 			new = &parent->rb_right;
148 		else
149 			return -EEXIST;
150 	}
151 
152 	nfp = kzalloc_obj(*nfp);
153 	if (!nfp)
154 		return -ENOMEM;
155 
156 	nfp->addr = addr;
157 	nfp->page = page;
158 	nfp->function = function;
159 	nfp->free_count = MLX5_NUM_4K_IN_PAGE;
160 	for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
161 		set_bit(i, &nfp->bitmask);
162 
163 	rb_link_node(&nfp->rb_node, parent, new);
164 	rb_insert_color(&nfp->rb_node, root);
165 	list_add(&nfp->list, &dev->priv.free_list);
166 
167 	return 0;
168 }
169 
170 static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr,
171 				    u32 function)
172 {
173 	struct fw_page *result = NULL;
174 	struct rb_root *root;
175 	struct rb_node *tmp;
176 	struct fw_page *tfp;
177 
178 	root = xa_load(&dev->priv.page_root_xa, function);
179 	if (WARN_ON_ONCE(!root))
180 		return NULL;
181 
182 	tmp = root->rb_node;
183 
184 	while (tmp) {
185 		tfp = rb_entry(tmp, struct fw_page, rb_node);
186 		if (tfp->addr < addr) {
187 			tmp = tmp->rb_left;
188 		} else if (tfp->addr > addr) {
189 			tmp = tmp->rb_right;
190 		} else {
191 			result = tfp;
192 			break;
193 		}
194 	}
195 
196 	return result;
197 }
198 
199 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
200 				s32 *npages, int boot)
201 {
202 	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {};
203 	u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {};
204 	int err;
205 
206 	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
207 	MLX5_SET(query_pages_in, in, op_mod, boot ?
208 		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
209 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
210 	MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
211 
212 	err = mlx5_cmd_exec_inout(dev, query_pages, in, out);
213 	if (err)
214 		return err;
215 
216 	*npages = MLX5_GET(query_pages_out, out, num_pages);
217 	*func_id = MLX5_GET(query_pages_out, out, function_id);
218 
219 	return err;
220 }
221 
222 static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function)
223 {
224 	struct fw_page *fp = NULL;
225 	struct fw_page *iter;
226 	unsigned n;
227 
228 	list_for_each_entry(iter, &dev->priv.free_list, list) {
229 		if (iter->function != function)
230 			continue;
231 		fp = iter;
232 	}
233 
234 	if (list_empty(&dev->priv.free_list) || !fp)
235 		return -ENOMEM;
236 
237 	n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
238 	if (n >= MLX5_NUM_4K_IN_PAGE) {
239 		mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n",
240 			       fp->addr, n, fp->bitmask,  MLX5_NUM_4K_IN_PAGE);
241 		return -ENOENT;
242 	}
243 	clear_bit(n, &fp->bitmask);
244 	fp->free_count--;
245 	if (!fp->free_count)
246 		list_del(&fp->list);
247 
248 	*addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
249 
250 	return 0;
251 }
252 
253 #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
254 
255 static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
256 		     bool in_free_list)
257 {
258 	struct rb_root *root;
259 
260 	root = xa_load(&dev->priv.page_root_xa, fwp->function);
261 	if (WARN_ON_ONCE(!root))
262 		return;
263 
264 	rb_erase(&fwp->rb_node, root);
265 	if (in_free_list)
266 		list_del(&fwp->list);
267 	dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK,
268 		       PAGE_SIZE, DMA_BIDIRECTIONAL);
269 	__free_page(fwp->page);
270 	kfree(fwp);
271 }
272 
273 static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function)
274 {
275 	struct fw_page *fwp;
276 	int n;
277 
278 	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function);
279 	if (!fwp) {
280 		mlx5_core_warn_rl(dev, "page not found\n");
281 		return;
282 	}
283 	n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
284 	fwp->free_count++;
285 	set_bit(n, &fwp->bitmask);
286 	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE)
287 		free_fwp(dev, fwp, fwp->free_count != 1);
288 	else if (fwp->free_count == 1)
289 		list_add(&fwp->list, &dev->priv.free_list);
290 }
291 
292 static int alloc_system_page(struct mlx5_core_dev *dev, u32 function)
293 {
294 	struct device *device = mlx5_core_dma_dev(dev);
295 	int nid = dev->priv.numa_node;
296 	struct page *page;
297 	u64 zero_addr = 1;
298 	u64 addr;
299 	int err;
300 
301 	page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
302 	if (!page) {
303 		mlx5_core_warn(dev, "failed to allocate page\n");
304 		return -ENOMEM;
305 	}
306 map:
307 	addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
308 	if (dma_mapping_error(device, addr)) {
309 		mlx5_core_warn(dev, "failed dma mapping page\n");
310 		err = -ENOMEM;
311 		goto err_mapping;
312 	}
313 
314 	/* Firmware doesn't support page with physical address 0 */
315 	if (addr == 0) {
316 		zero_addr = addr;
317 		goto map;
318 	}
319 
320 	err = insert_page(dev, addr, page, function);
321 	if (err) {
322 		mlx5_core_err(dev, "failed to track allocated page\n");
323 		dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
324 	}
325 
326 err_mapping:
327 	if (err)
328 		__free_page(page);
329 
330 	if (zero_addr == 0)
331 		dma_unmap_page(device, zero_addr, PAGE_SIZE,
332 			       DMA_BIDIRECTIONAL);
333 
334 	return err;
335 }
336 
337 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
338 			     bool ec_function)
339 {
340 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
341 	int err;
342 
343 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
344 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
345 	MLX5_SET(manage_pages_in, in, function_id, func_id);
346 	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
347 
348 	err = mlx5_cmd_exec_in(dev, manage_pages, in);
349 	if (err)
350 		mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
351 			       func_id, err);
352 }
353 
354 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
355 		      int event, bool ec_function)
356 {
357 	u32 function = get_function(func_id, ec_function);
358 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
359 	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
360 	int notify_fail = event;
361 	u16 func_type;
362 	u64 addr;
363 	int err;
364 	u32 *in;
365 	int i;
366 
367 	inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
368 	in = kvzalloc(inlen, GFP_KERNEL);
369 	if (!in) {
370 		err = -ENOMEM;
371 		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
372 		goto out_free;
373 	}
374 
375 	for (i = 0; i < npages; i++) {
376 retry:
377 		err = alloc_4k(dev, &addr, function);
378 		if (err) {
379 			if (err == -ENOMEM)
380 				err = alloc_system_page(dev, function);
381 			if (err) {
382 				dev->priv.fw_pages_alloc_failed += (npages - i);
383 				goto out_4k;
384 			}
385 
386 			goto retry;
387 		}
388 		MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
389 	}
390 
391 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
392 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
393 	MLX5_SET(manage_pages_in, in, function_id, func_id);
394 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
395 	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
396 
397 	err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out));
398 	if (err == -EREMOTEIO) {
399 		notify_fail = 0;
400 		/* if triggered by FW and failed by FW ignore */
401 		if (event) {
402 			err = 0;
403 			goto out_dropped;
404 		}
405 	}
406 	err = mlx5_cmd_check(dev, err, in, out);
407 	if (err) {
408 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
409 			       func_id, npages, err);
410 		goto out_dropped;
411 	}
412 
413 	func_type = func_id_to_type(dev, func_id, ec_function);
414 	dev->priv.page_counters[func_type] += npages;
415 	dev->priv.fw_pages += npages;
416 
417 	mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
418 		      npages, ec_function, func_id, err);
419 
420 	kvfree(in);
421 	return 0;
422 
423 out_dropped:
424 	dev->priv.give_pages_dropped += npages;
425 out_4k:
426 	for (i--; i >= 0; i--)
427 		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function);
428 out_free:
429 	kvfree(in);
430 	if (notify_fail)
431 		page_notify_fail(dev, func_id, ec_function);
432 	return err;
433 }
434 
435 static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id,
436 			      bool ec_function)
437 {
438 	u32 function = get_function(func_id, ec_function);
439 	struct rb_root *root;
440 	struct rb_node *p;
441 	int npages = 0;
442 	u16 func_type;
443 
444 	root = xa_load(&dev->priv.page_root_xa, function);
445 	if (WARN_ON_ONCE(!root))
446 		return;
447 
448 	p = rb_first(root);
449 	while (p) {
450 		struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
451 
452 		p = rb_next(p);
453 		npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
454 		free_fwp(dev, fwp, fwp->free_count);
455 	}
456 
457 	func_type = func_id_to_type(dev, func_id, ec_function);
458 	dev->priv.page_counters[func_type] -= npages;
459 	dev->priv.fw_pages -= npages;
460 
461 	mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n",
462 		      npages, ec_function, func_id);
463 }
464 
465 static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
466 				     u32 npages)
467 {
468 	u32 pages_set = 0;
469 	unsigned int n;
470 
471 	for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
472 		MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
473 				 fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
474 		pages_set++;
475 
476 		if (!--npages)
477 			break;
478 	}
479 
480 	return pages_set;
481 }
482 
483 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
484 			     u32 *in, int in_size, u32 *out, int out_size)
485 {
486 	struct rb_root *root;
487 	struct fw_page *fwp;
488 	struct rb_node *p;
489 	bool ec_function;
490 	u32 func_id;
491 	u32 npages;
492 	u32 i = 0;
493 	int err;
494 
495 	err = mlx5_cmd_do(dev, in, in_size, out, out_size);
496 	/* If FW is gone (-ENXIO), proceed to forceful reclaim */
497 	if (err != -ENXIO)
498 		return err;
499 
500 	/* No hard feelings, we want our pages back! */
501 	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
502 	func_id = MLX5_GET(manage_pages_in, in, function_id);
503 	ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function);
504 
505 	root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function));
506 	if (WARN_ON_ONCE(!root))
507 		return -EEXIST;
508 
509 	p = rb_first(root);
510 	while (p && i < npages) {
511 		fwp = rb_entry(p, struct fw_page, rb_node);
512 		p = rb_next(p);
513 
514 		i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
515 	}
516 
517 	MLX5_SET(manage_pages_out, out, output_num_entries, i);
518 	return 0;
519 }
520 
521 static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
522 			 int *nclaimed, bool event, bool ec_function)
523 {
524 	u32 function = get_function(func_id, ec_function);
525 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
526 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
527 	int num_claimed;
528 	u16 func_type;
529 	u32 *out;
530 	int err;
531 	int i;
532 
533 	if (nclaimed)
534 		*nclaimed = 0;
535 
536 	outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
537 	out = kvzalloc(outlen, GFP_KERNEL);
538 	if (!out)
539 		return -ENOMEM;
540 
541 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
542 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
543 	MLX5_SET(manage_pages_in, in, function_id, func_id);
544 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
545 	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
546 
547 	mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n",
548 		      func_id, npages, outlen);
549 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
550 	if (err) {
551 		npages = MLX5_GET(manage_pages_in, in, input_num_entries);
552 		dev->priv.reclaim_pages_discard += npages;
553 	}
554 	/* if triggered by FW event and failed by FW then ignore */
555 	if (event && err == -EREMOTEIO) {
556 		err = 0;
557 		goto out_free;
558 	}
559 
560 	err = mlx5_cmd_check(dev, err, in, out);
561 	if (err) {
562 		mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
563 		goto out_free;
564 	}
565 
566 	num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
567 	if (num_claimed > npages) {
568 		mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
569 			       num_claimed, npages);
570 		err = -EINVAL;
571 		goto out_free;
572 	}
573 
574 	for (i = 0; i < num_claimed; i++)
575 		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function);
576 
577 	if (nclaimed)
578 		*nclaimed = num_claimed;
579 
580 	func_type = func_id_to_type(dev, func_id, ec_function);
581 	dev->priv.page_counters[func_type] -= num_claimed;
582 	dev->priv.fw_pages -= num_claimed;
583 
584 out_free:
585 	kvfree(out);
586 	return err;
587 }
588 
589 static void pages_work_handler(struct work_struct *work)
590 {
591 	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
592 	struct mlx5_core_dev *dev = req->dev;
593 	int err = 0;
594 
595 	if (req->release_all)
596 		release_all_pages(dev, req->func_id, req->ec_function);
597 	else if (req->npages < 0)
598 		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
599 				    true, req->ec_function);
600 	else if (req->npages > 0)
601 		err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
602 
603 	if (err)
604 		mlx5_core_warn(dev, "%s fail %d\n",
605 			       req->npages < 0 ? "reclaim" : "give", err);
606 
607 	kfree(req);
608 }
609 
610 enum {
611 	EC_FUNCTION_MASK = 0x8000,
612 	RELEASE_ALL_PAGES_MASK = 0x4000,
613 };
614 
615 /* This limit is based on the capability of the firmware as it cannot release
616  * more than 50000 back to the host in one go.
617  */
618 #define MAX_RECLAIM_NPAGES (-50000)
619 
620 static int req_pages_handler(struct notifier_block *nb,
621 			     unsigned long type, void *data)
622 {
623 	struct mlx5_pages_req *req;
624 	struct mlx5_core_dev *dev;
625 	struct mlx5_priv *priv;
626 	struct mlx5_eqe *eqe;
627 	bool ec_function;
628 	bool release_all;
629 	u16 func_id;
630 	s32 npages;
631 
632 	priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
633 	dev  = container_of(priv, struct mlx5_core_dev, priv);
634 	eqe  = data;
635 
636 	func_id = be16_to_cpu(eqe->data.req_pages.func_id);
637 	npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
638 	ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
639 	release_all = be16_to_cpu(eqe->data.req_pages.ec_function) &
640 		      RELEASE_ALL_PAGES_MASK;
641 	mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n",
642 		      func_id, npages, release_all);
643 	req = kzalloc_obj(*req, GFP_ATOMIC);
644 	if (!req) {
645 		mlx5_core_warn(dev, "failed to allocate pages request\n");
646 		return NOTIFY_DONE;
647 	}
648 
649 	req->dev = dev;
650 	req->func_id = func_id;
651 
652 	/* npages > 0 means HCA asking host to allocate/give pages,
653 	 * npages < 0 means HCA asking host to reclaim back the pages allocated.
654 	 * Here we are restricting the maximum number of pages that can be
655 	 * reclaimed to be MAX_RECLAIM_NPAGES. Note that MAX_RECLAIM_NPAGES is
656 	 * a negative value.
657 	 * Since MAX_RECLAIM is negative, we are using max() to restrict
658 	 * req->npages (and not min ()).
659 	 */
660 	req->npages = max_t(s32, npages, MAX_RECLAIM_NPAGES);
661 	req->ec_function = ec_function;
662 	req->release_all = release_all;
663 	INIT_WORK(&req->work, pages_work_handler);
664 	queue_work(dev->priv.pg_wq, &req->work);
665 	return NOTIFY_OK;
666 }
667 
668 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
669 {
670 	u16 func_id;
671 	s32 npages;
672 	int err;
673 
674 	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
675 	if (err)
676 		return err;
677 
678 	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
679 		      npages, boot ? "boot" : "init", func_id);
680 
681 	if (!npages)
682 		return 0;
683 
684 	return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
685 }
686 
687 enum {
688 	MLX5_BLKS_FOR_RECLAIM_PAGES = 12
689 };
690 
691 static int optimal_reclaimed_pages(void)
692 {
693 	struct mlx5_cmd_prot_block *block;
694 	struct mlx5_cmd_layout *lay;
695 	int ret;
696 
697 	ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
698 	       MLX5_ST_SZ_BYTES(manage_pages_out)) /
699 	       MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
700 
701 	return ret;
702 }
703 
704 static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
705 				   struct rb_root *root, u32 function)
706 {
707 	u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
708 	unsigned long end = jiffies + recl_pages_to_jiffies;
709 
710 	while (!RB_EMPTY_ROOT(root)) {
711 		u32 ec_function = mlx5_get_ec_function(function);
712 		u32 function_id = mlx5_get_func_id(function);
713 		int nclaimed;
714 		int err;
715 
716 		err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(),
717 				    &nclaimed, false, ec_function);
718 		if (err) {
719 			mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n",
720 				       err, function_id, ec_function);
721 			return err;
722 		}
723 
724 		if (nclaimed)
725 			end = jiffies + recl_pages_to_jiffies;
726 
727 		if (time_after(jiffies, end)) {
728 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
729 			break;
730 		}
731 	}
732 
733 	return 0;
734 }
735 
736 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
737 {
738 	struct rb_root *root;
739 	unsigned long id;
740 	void *entry;
741 
742 	xa_for_each(&dev->priv.page_root_xa, id, entry) {
743 		root = entry;
744 		mlx5_reclaim_root_pages(dev, root, id);
745 		xa_erase(&dev->priv.page_root_xa, id);
746 		kfree(root);
747 	}
748 
749 	WARN_ON(!xa_empty(&dev->priv.page_root_xa));
750 
751 	WARN(dev->priv.fw_pages,
752 	     "FW pages counter is %d after reclaiming all pages\n",
753 	     dev->priv.fw_pages);
754 	WARN(dev->priv.page_counters[MLX5_VF],
755 	     "VFs FW pages counter is %d after reclaiming all pages\n",
756 	     dev->priv.page_counters[MLX5_VF]);
757 	WARN(dev->priv.page_counters[MLX5_HOST_PF],
758 	     "External host PF FW pages counter is %d after reclaiming all pages\n",
759 	     dev->priv.page_counters[MLX5_HOST_PF]);
760 	WARN(dev->priv.page_counters[MLX5_EC_VF],
761 	     "EC VFs FW pages counter is %d after reclaiming all pages\n",
762 	     dev->priv.page_counters[MLX5_EC_VF]);
763 
764 	return 0;
765 }
766 
767 int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
768 {
769 	INIT_LIST_HEAD(&dev->priv.free_list);
770 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
771 	if (!dev->priv.pg_wq)
772 		return -ENOMEM;
773 
774 	xa_init(&dev->priv.page_root_xa);
775 	mlx5_pages_debugfs_init(dev);
776 
777 	return 0;
778 }
779 
780 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
781 {
782 	mlx5_pages_debugfs_cleanup(dev);
783 	xa_destroy(&dev->priv.page_root_xa);
784 	destroy_workqueue(dev->priv.pg_wq);
785 }
786 
787 void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
788 {
789 	MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
790 	mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
791 }
792 
793 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
794 {
795 	mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
796 	flush_workqueue(dev->priv.pg_wq);
797 }
798 
799 int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
800 {
801 	u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
802 	unsigned long end = jiffies + recl_vf_pages_to_jiffies;
803 	int prev_pages = *pages;
804 
805 	/* In case of internal error we will free the pages manually later */
806 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
807 		mlx5_core_warn(dev, "Skipping wait for vf pages stage");
808 		return 0;
809 	}
810 
811 	mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages);
812 	while (*pages) {
813 		if (time_after(jiffies, end)) {
814 			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
815 			return -ETIMEDOUT;
816 		}
817 		if (*pages < prev_pages) {
818 			end = jiffies + recl_vf_pages_to_jiffies;
819 			prev_pages = *pages;
820 		}
821 		msleep(50);
822 	}
823 
824 	mlx5_core_dbg(dev, "All pages received\n");
825 	return 0;
826 }
827