xref: /linux/drivers/gpu/drm/ttm/ttm_pool.c (revision 51d24842acb9b8d643046c71314cc3d7a846a3cf)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2020 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors: Christian König
24  */
25 
26 /* Pooling of allocated pages is necessary because changing the caching
27  * attributes on x86 of the linear mapping requires a costly cross CPU TLB
28  * invalidate for those addresses.
29  *
30  * Additional to that allocations from the DMA coherent API are pooled as well
31  * cause they are rather slow compared to alloc_pages+map.
32  */
33 
34 #include <linux/export.h>
35 #include <linux/module.h>
36 #include <linux/dma-mapping.h>
37 #include <linux/debugfs.h>
38 #include <linux/highmem.h>
39 #include <linux/sched/mm.h>
40 
41 #ifdef CONFIG_X86
42 #include <asm/set_memory.h>
43 #endif
44 
45 #include <drm/ttm/ttm_backup.h>
46 #include <drm/ttm/ttm_pool.h>
47 #include <drm/ttm/ttm_tt.h>
48 #include <drm/ttm/ttm_bo.h>
49 
50 #include "ttm_module.h"
51 #include "ttm_pool_internal.h"
52 
53 #ifdef CONFIG_FAULT_INJECTION
54 #include <linux/fault-inject.h>
55 static DECLARE_FAULT_ATTR(backup_fault_inject);
56 #else
57 #define should_fail(...) false
58 #endif
59 
60 /**
61  * struct ttm_pool_dma - Helper object for coherent DMA mappings
62  *
63  * @addr: original DMA address returned for the mapping
64  * @vaddr: original vaddr return for the mapping and order in the lower bits
65  */
66 struct ttm_pool_dma {
67 	dma_addr_t addr;
68 	unsigned long vaddr;
69 };
70 
71 /**
72  * struct ttm_pool_alloc_state - Current state of the tt page allocation process
73  * @pages: Pointer to the next tt page pointer to populate.
74  * @caching_divide: Pointer to the first page pointer whose page has a staged but
75  * not committed caching transition from write-back to @tt_caching.
76  * @dma_addr: Pointer to the next tt dma_address entry to populate if any.
77  * @remaining_pages: Remaining pages to populate.
78  * @tt_caching: The requested cpu-caching for the pages allocated.
79  */
80 struct ttm_pool_alloc_state {
81 	struct page **pages;
82 	struct page **caching_divide;
83 	dma_addr_t *dma_addr;
84 	pgoff_t remaining_pages;
85 	enum ttm_caching tt_caching;
86 };
87 
88 /**
89  * struct ttm_pool_tt_restore - State representing restore from backup
90  * @pool: The pool used for page allocation while restoring.
91  * @snapshot_alloc: A snapshot of the most recent struct ttm_pool_alloc_state.
92  * @alloced_page: Pointer to the page most recently allocated from a pool or system.
93  * @first_dma: The dma address corresponding to @alloced_page if dma_mapping
94  * is requested.
95  * @alloced_pages: The number of allocated pages present in the struct ttm_tt
96  * page vector from this restore session.
97  * @restored_pages: The number of 4K pages restored for @alloced_page (which
98  * is typically a multi-order page).
99  * @page_caching: The struct ttm_tt requested caching
100  * @order: The order of @alloced_page.
101  *
102  * Recovery from backup might fail when we've recovered less than the
103  * full ttm_tt. In order not to loose any data (yet), keep information
104  * around that allows us to restart a failed ttm backup recovery.
105  */
106 struct ttm_pool_tt_restore {
107 	struct ttm_pool *pool;
108 	struct ttm_pool_alloc_state snapshot_alloc;
109 	struct page *alloced_page;
110 	dma_addr_t first_dma;
111 	pgoff_t alloced_pages;
112 	pgoff_t restored_pages;
113 	enum ttm_caching page_caching;
114 	unsigned int order;
115 };
116 
117 static unsigned long page_pool_size;
118 
119 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool per NUMA node");
120 module_param(page_pool_size, ulong, 0644);
121 
122 static unsigned long pool_node_limit[MAX_NUMNODES];
123 static atomic_long_t allocated_pages[MAX_NUMNODES];
124 
125 static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS];
126 static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS];
127 
128 static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS];
129 static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS];
130 
131 static spinlock_t shrinker_lock;
132 static struct list_head shrinker_list;
133 static struct shrinker *mm_shrinker;
134 static DECLARE_RWSEM(pool_shrink_rwsem);
135 
ttm_pool_nid(struct ttm_pool * pool)136 static int ttm_pool_nid(struct ttm_pool *pool)
137 {
138 	int nid = NUMA_NO_NODE;
139 	if (pool)
140 		nid = pool->nid;
141 	if (nid == NUMA_NO_NODE)
142 		nid = numa_node_id();
143 	return nid;
144 }
145 
146 /* Allocate pages of size 1 << order with the given gfp_flags */
ttm_pool_alloc_page(struct ttm_pool * pool,gfp_t gfp_flags,unsigned int order)147 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
148 					unsigned int order)
149 {
150 	const unsigned int beneficial_order = ttm_pool_beneficial_order(pool);
151 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
152 	struct ttm_pool_dma *dma;
153 	struct page *p;
154 	void *vaddr;
155 
156 	/* Don't set the __GFP_COMP flag for higher order allocations.
157 	 * Mapping pages directly into an userspace process and calling
158 	 * put_page() on a TTM allocated page is illegal.
159 	 */
160 	if (order)
161 		gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN |
162 			__GFP_THISNODE;
163 
164 	/*
165 	 * Do not add latency to the allocation path for allocations orders
166 	 * device tolds us do not bring them additional performance gains.
167 	 */
168 	if (beneficial_order && order > beneficial_order)
169 		gfp_flags &= ~__GFP_DIRECT_RECLAIM;
170 
171 	if (!ttm_pool_uses_dma_alloc(pool)) {
172 		p = alloc_pages_node(pool->nid, gfp_flags, order);
173 		if (p) {
174 			p->private = order;
175 			mod_lruvec_page_state(p, NR_GPU_ACTIVE, 1 << order);
176 		}
177 		return p;
178 	}
179 
180 	dma = kmalloc_obj(*dma);
181 	if (!dma)
182 		return NULL;
183 
184 	if (order)
185 		attr |= DMA_ATTR_NO_WARN;
186 
187 	vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE,
188 				&dma->addr, gfp_flags, attr);
189 	if (!vaddr)
190 		goto error_free;
191 
192 	/* TODO: This is an illegal abuse of the DMA API, but we need to rework
193 	 * TTM page fault handling and extend the DMA API to clean this up.
194 	 */
195 	if (is_vmalloc_addr(vaddr))
196 		p = vmalloc_to_page(vaddr);
197 	else
198 		p = virt_to_page(vaddr);
199 
200 	dma->vaddr = (unsigned long)vaddr | order;
201 	p->private = (unsigned long)dma;
202 	return p;
203 
204 error_free:
205 	kfree(dma);
206 	return NULL;
207 }
208 
__free_pages_gpu_account(struct page * p,unsigned int order,bool reclaim)209 static void __free_pages_gpu_account(struct page *p, unsigned int order,
210 				     bool reclaim)
211 {
212 	mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE,
213 			      -(1 << order));
214 	__free_pages(p, order);
215 }
216 
217 /* Reset the caching and pages of size 1 << order */
ttm_pool_free_page(struct ttm_pool * pool,enum ttm_caching caching,unsigned int order,struct page * p,bool reclaim)218 static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
219 			       unsigned int order, struct page *p, bool reclaim)
220 {
221 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
222 	struct ttm_pool_dma *dma;
223 	void *vaddr;
224 
225 #ifdef CONFIG_X86
226 	/* We don't care that set_pages_wb is inefficient here. This is only
227 	 * used when we have to shrink and CPU overhead is irrelevant then.
228 	 */
229 	if (caching != ttm_cached && !PageHighMem(p))
230 		set_pages_wb(p, 1 << order);
231 #endif
232 
233 	if (!pool || !ttm_pool_uses_dma_alloc(pool)) {
234 		__free_pages_gpu_account(p, order, reclaim);
235 		return;
236 	}
237 
238 	if (order)
239 		attr |= DMA_ATTR_NO_WARN;
240 
241 	dma = (void *)p->private;
242 	vaddr = (void *)(dma->vaddr & PAGE_MASK);
243 	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr,
244 		       attr);
245 	kfree(dma);
246 }
247 
248 /* Apply any cpu-caching deferred during page allocation */
ttm_pool_apply_caching(struct ttm_pool_alloc_state * alloc)249 static int ttm_pool_apply_caching(struct ttm_pool_alloc_state *alloc)
250 {
251 #ifdef CONFIG_X86
252 	unsigned int num_pages = alloc->pages - alloc->caching_divide;
253 
254 	if (!num_pages)
255 		return 0;
256 
257 	switch (alloc->tt_caching) {
258 	case ttm_cached:
259 		break;
260 	case ttm_write_combined:
261 		return set_pages_array_wc(alloc->caching_divide, num_pages);
262 	case ttm_uncached:
263 		return set_pages_array_uc(alloc->caching_divide, num_pages);
264 	}
265 #endif
266 	alloc->caching_divide = alloc->pages;
267 	return 0;
268 }
269 
270 /* DMA Map pages of 1 << order size and return the resulting dma_address. */
ttm_pool_map(struct ttm_pool * pool,unsigned int order,struct page * p,dma_addr_t * dma_addr)271 static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
272 			struct page *p, dma_addr_t *dma_addr)
273 {
274 	dma_addr_t addr;
275 
276 	if (ttm_pool_uses_dma_alloc(pool)) {
277 		struct ttm_pool_dma *dma = (void *)p->private;
278 
279 		addr = dma->addr;
280 	} else {
281 		size_t size = (1ULL << order) * PAGE_SIZE;
282 
283 		addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL);
284 		if (dma_mapping_error(pool->dev, addr))
285 			return -EFAULT;
286 	}
287 
288 	*dma_addr = addr;
289 
290 	return 0;
291 }
292 
293 /* Unmap pages of 1 << order size */
ttm_pool_unmap(struct ttm_pool * pool,dma_addr_t dma_addr,unsigned int num_pages)294 static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
295 			   unsigned int num_pages)
296 {
297 	/* Unmapped while freeing the page */
298 	if (ttm_pool_uses_dma_alloc(pool))
299 		return;
300 
301 	dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT,
302 		       DMA_BIDIRECTIONAL);
303 }
304 
305 /* Give pages into a specific pool_type */
ttm_pool_type_give(struct ttm_pool_type * pt,struct page * p)306 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
307 {
308 	unsigned int i, num_pages = 1 << pt->order;
309 	int nid = page_to_nid(p);
310 
311 	for (i = 0; i < num_pages; ++i) {
312 		if (PageHighMem(p))
313 			clear_highpage(p + i);
314 		else
315 			clear_page(page_address(p + i));
316 	}
317 
318 	INIT_LIST_HEAD(&p->lru);
319 	rcu_read_lock();
320 	list_lru_add(&pt->pages, &p->lru, nid, NULL);
321 	rcu_read_unlock();
322 
323 	atomic_long_add(num_pages, &allocated_pages[nid]);
324 	mod_lruvec_page_state(p, NR_GPU_ACTIVE, -num_pages);
325 	mod_lruvec_page_state(p, NR_GPU_RECLAIM, num_pages);
326 }
327 
take_one_from_lru(struct list_head * item,struct list_lru_one * list,void * cb_arg)328 static enum lru_status take_one_from_lru(struct list_head *item,
329 					 struct list_lru_one *list,
330 					 void *cb_arg)
331 {
332 	struct page **out_page = cb_arg;
333 	struct page *p = container_of(item, struct page, lru);
334 	list_lru_isolate(list, item);
335 
336 	*out_page = p;
337 	return LRU_REMOVED;
338 }
339 
340 /* Take pages from a specific pool_type, return NULL when nothing available */
ttm_pool_type_take(struct ttm_pool_type * pt,int nid)341 static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, int nid)
342 {
343 	int ret;
344 	struct page *p = NULL;
345 	unsigned long nr_to_walk = 1;
346 
347 	ret = list_lru_walk_node(&pt->pages, nid, take_one_from_lru, (void *)&p, &nr_to_walk);
348 	if (ret == 1 && p) {
349 		atomic_long_sub(1 << pt->order, &allocated_pages[nid]);
350 		mod_lruvec_page_state(p, NR_GPU_ACTIVE, (1 << pt->order));
351 		mod_lruvec_page_state(p, NR_GPU_RECLAIM, -(1 << pt->order));
352 	}
353 	return p;
354 }
355 
356 /* Initialize and add a pool type to the global shrinker list */
ttm_pool_type_init(struct ttm_pool_type * pt,struct ttm_pool * pool,enum ttm_caching caching,unsigned int order)357 static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
358 			       enum ttm_caching caching, unsigned int order)
359 {
360 	pt->pool = pool;
361 	pt->caching = caching;
362 	pt->order = order;
363 	list_lru_init(&pt->pages);
364 
365 	spin_lock(&shrinker_lock);
366 	list_add_tail(&pt->shrinker_list, &shrinker_list);
367 	spin_unlock(&shrinker_lock);
368 }
369 
pool_move_to_dispose_list(struct list_head * item,struct list_lru_one * list,void * cb_arg)370 static enum lru_status pool_move_to_dispose_list(struct list_head *item,
371 						 struct list_lru_one *list,
372 						 void *cb_arg)
373 {
374 	struct list_head *dispose = cb_arg;
375 
376 	list_lru_isolate_move(list, item, dispose);
377 
378 	return LRU_REMOVED;
379 }
380 
ttm_pool_dispose_list(struct ttm_pool_type * pt,struct list_head * dispose)381 static void ttm_pool_dispose_list(struct ttm_pool_type *pt,
382 				  struct list_head *dispose)
383 {
384 	while (!list_empty(dispose)) {
385 		struct page *p;
386 		p = list_first_entry(dispose, struct page, lru);
387 		list_del_init(&p->lru);
388 		atomic_long_sub(1 << pt->order, &allocated_pages[page_to_nid(p)]);
389 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p, true);
390 	}
391 }
392 
393 /* Remove a pool_type from the global shrinker list and free all pages */
ttm_pool_type_fini(struct ttm_pool_type * pt)394 static void ttm_pool_type_fini(struct ttm_pool_type *pt)
395 {
396 	LIST_HEAD(dispose);
397 
398 	spin_lock(&shrinker_lock);
399 	list_del(&pt->shrinker_list);
400 	spin_unlock(&shrinker_lock);
401 
402 	list_lru_walk(&pt->pages, pool_move_to_dispose_list, &dispose, LONG_MAX);
403 	ttm_pool_dispose_list(pt, &dispose);
404 }
405 
406 /* Return the pool_type to use for the given caching and order */
ttm_pool_select_type(struct ttm_pool * pool,enum ttm_caching caching,unsigned int order)407 static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
408 						  enum ttm_caching caching,
409 						  unsigned int order)
410 {
411 	if (ttm_pool_uses_dma_alloc(pool))
412 		return &pool->caching[caching].orders[order];
413 
414 #ifdef CONFIG_X86
415 	switch (caching) {
416 	case ttm_write_combined:
417 		if (ttm_pool_uses_dma32(pool))
418 			return &global_dma32_write_combined[order];
419 
420 		return &global_write_combined[order];
421 	case ttm_uncached:
422 		if (ttm_pool_uses_dma32(pool))
423 			return &global_dma32_uncached[order];
424 
425 		return &global_uncached[order];
426 	default:
427 		break;
428 	}
429 #endif
430 
431 	return NULL;
432 }
433 
434 /* Free pages using the per-node shrinker list */
ttm_pool_shrink(int nid,unsigned long num_to_free)435 static unsigned int ttm_pool_shrink(int nid, unsigned long num_to_free)
436 {
437 	LIST_HEAD(dispose);
438 	struct ttm_pool_type *pt;
439 	unsigned int num_pages;
440 
441 	down_read(&pool_shrink_rwsem);
442 	spin_lock(&shrinker_lock);
443 	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
444 	list_move_tail(&pt->shrinker_list, &shrinker_list);
445 	spin_unlock(&shrinker_lock);
446 
447 	num_pages = list_lru_walk_node(&pt->pages, nid, pool_move_to_dispose_list, &dispose, &num_to_free);
448 	num_pages *= 1 << pt->order;
449 
450 	ttm_pool_dispose_list(pt, &dispose);
451 	up_read(&pool_shrink_rwsem);
452 
453 	return num_pages;
454 }
455 
456 /* Return the allocation order based for a page */
ttm_pool_page_order(struct ttm_pool * pool,struct page * p)457 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
458 {
459 	if (ttm_pool_uses_dma_alloc(pool)) {
460 		struct ttm_pool_dma *dma = (void *)p->private;
461 
462 		return dma->vaddr & ~PAGE_MASK;
463 	}
464 
465 	return p->private;
466 }
467 
468 /*
469  * Split larger pages so that we can free each PAGE_SIZE page as soon
470  * as it has been backed up, in order to avoid memory pressure during
471  * reclaim.
472  */
ttm_pool_split_for_swap(struct ttm_pool * pool,struct page * p)473 static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
474 {
475 	unsigned int order = ttm_pool_page_order(pool, p);
476 	pgoff_t nr;
477 
478 	if (!order)
479 		return;
480 
481 	split_page(p, order);
482 	nr = 1UL << order;
483 	while (nr--)
484 		(p++)->private = 0;
485 }
486 
487 /**
488  * DOC: Partial backup and restoration of a struct ttm_tt.
489  *
490  * Swapout using ttm_backup_backup_page() and swapin using
491  * ttm_backup_copy_page() may fail.
492  * The former most likely due to lack of swap-space or memory, the latter due
493  * to lack of memory or because of signal interruption during waits.
494  *
495  * Backup failure is easily handled by using a ttm_tt pages vector that holds
496  * both backup handles and page pointers. This has to be taken into account when
497  * restoring such a ttm_tt from backup, and when freeing it while backed up.
498  * When restoring, for simplicity, new pages are actually allocated from the
499  * pool and the contents of any old pages are copied in and then the old pages
500  * are released.
501  *
502  * For restoration failures, the struct ttm_pool_tt_restore holds sufficient state
503  * to be able to resume an interrupted restore, and that structure is freed once
504  * the restoration is complete. If the struct ttm_tt is destroyed while there
505  * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
506  * care of.
507  */
508 
509 /* Is restore ongoing for the currently allocated page? */
ttm_pool_restore_valid(const struct ttm_pool_tt_restore * restore)510 static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
511 {
512 	return restore && restore->restored_pages < (1 << restore->order);
513 }
514 
515 /* DMA unmap and free a multi-order page, either to the relevant pool or to system. */
ttm_pool_unmap_and_free(struct ttm_pool * pool,struct page * page,const dma_addr_t * dma_addr,enum ttm_caching caching)516 static pgoff_t ttm_pool_unmap_and_free(struct ttm_pool *pool, struct page *page,
517 				       const dma_addr_t *dma_addr, enum ttm_caching caching)
518 {
519 	struct ttm_pool_type *pt = NULL;
520 	unsigned int order;
521 	pgoff_t nr;
522 
523 	if (pool) {
524 		order = ttm_pool_page_order(pool, page);
525 		nr = (1UL << order);
526 		if (dma_addr)
527 			ttm_pool_unmap(pool, *dma_addr, nr);
528 
529 		pt = ttm_pool_select_type(pool, caching, order);
530 	} else {
531 		order = page->private;
532 		nr = (1UL << order);
533 	}
534 
535 	if (pt)
536 		ttm_pool_type_give(pt, page);
537 	else
538 		ttm_pool_free_page(pool, caching, order, page, false);
539 
540 	return nr;
541 }
542 
543 /* Populate the page-array using the most recent allocated multi-order page. */
ttm_pool_allocated_page_commit(struct page * allocated,dma_addr_t first_dma,struct ttm_pool_alloc_state * alloc,pgoff_t nr)544 static void ttm_pool_allocated_page_commit(struct page *allocated,
545 					   dma_addr_t first_dma,
546 					   struct ttm_pool_alloc_state *alloc,
547 					   pgoff_t nr)
548 {
549 	pgoff_t i;
550 
551 	for (i = 0; i < nr; ++i)
552 		*alloc->pages++ = allocated++;
553 
554 	alloc->remaining_pages -= nr;
555 
556 	if (!alloc->dma_addr)
557 		return;
558 
559 	for (i = 0; i < nr; ++i) {
560 		*alloc->dma_addr++ = first_dma;
561 		first_dma += PAGE_SIZE;
562 	}
563 }
564 
565 /*
566  * When restoring, restore backed-up content to the newly allocated page and
567  * if successful, populate the page-table and dma-address arrays.
568  */
ttm_pool_restore_commit(struct ttm_pool_tt_restore * restore,struct file * backup,const struct ttm_operation_ctx * ctx,struct ttm_pool_alloc_state * alloc)569 static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore,
570 				   struct file *backup,
571 				   const struct ttm_operation_ctx *ctx,
572 				   struct ttm_pool_alloc_state *alloc)
573 
574 {
575 	pgoff_t i, nr = 1UL << restore->order;
576 	struct page **first_page = alloc->pages;
577 	struct page *p;
578 	int ret = 0;
579 
580 	for (i = restore->restored_pages; i < nr; ++i) {
581 		p = first_page[i];
582 		if (ttm_backup_page_ptr_is_handle(p)) {
583 			unsigned long handle = ttm_backup_page_ptr_to_handle(p);
584 			gfp_t additional_gfp = ctx->gfp_retry_mayfail ?
585 				__GFP_RETRY_MAYFAIL | __GFP_NOWARN : 0;
586 
587 			if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
588 			    should_fail(&backup_fault_inject, 1)) {
589 				ret = -EINTR;
590 				break;
591 			}
592 
593 			if (handle == 0) {
594 				restore->restored_pages++;
595 				continue;
596 			}
597 
598 			ret = ttm_backup_copy_page(backup, restore->alloced_page + i,
599 						   handle, ctx->interruptible,
600 						   additional_gfp);
601 			if (ret)
602 				break;
603 
604 			ttm_backup_drop(backup, handle);
605 		} else if (p) {
606 			/*
607 			 * We could probably avoid splitting the old page
608 			 * using clever logic, but ATM we don't care, as
609 			 * we prioritize releasing memory ASAP. Note that
610 			 * here, the old retained page is always write-back
611 			 * cached.
612 			 */
613 			ttm_pool_split_for_swap(restore->pool, p);
614 			copy_highpage(restore->alloced_page + i, p);
615 			__free_pages_gpu_account(p, 0, false);
616 		}
617 
618 		restore->restored_pages++;
619 		first_page[i] = ttm_backup_handle_to_page_ptr(0);
620 	}
621 
622 	if (ret) {
623 		if (!restore->restored_pages) {
624 			dma_addr_t *dma_addr = alloc->dma_addr ? &restore->first_dma : NULL;
625 
626 			ttm_pool_unmap_and_free(restore->pool, restore->alloced_page,
627 						dma_addr, restore->page_caching);
628 			restore->restored_pages = nr;
629 		}
630 		return ret;
631 	}
632 
633 	ttm_pool_allocated_page_commit(restore->alloced_page, restore->first_dma,
634 				       alloc, nr);
635 	if (restore->page_caching == alloc->tt_caching || PageHighMem(restore->alloced_page))
636 		alloc->caching_divide = alloc->pages;
637 	restore->snapshot_alloc = *alloc;
638 	restore->alloced_pages += nr;
639 
640 	return 0;
641 }
642 
643 /* If restoring, save information needed for ttm_pool_restore_commit(). */
644 static void
ttm_pool_page_allocated_restore(struct ttm_pool * pool,unsigned int order,struct page * p,enum ttm_caching page_caching,dma_addr_t first_dma,struct ttm_pool_tt_restore * restore,const struct ttm_pool_alloc_state * alloc)645 ttm_pool_page_allocated_restore(struct ttm_pool *pool, unsigned int order,
646 				struct page *p,
647 				enum ttm_caching page_caching,
648 				dma_addr_t first_dma,
649 				struct ttm_pool_tt_restore *restore,
650 				const struct ttm_pool_alloc_state *alloc)
651 {
652 	restore->pool = pool;
653 	restore->order = order;
654 	restore->restored_pages = 0;
655 	restore->page_caching = page_caching;
656 	restore->first_dma = first_dma;
657 	restore->alloced_page = p;
658 	restore->snapshot_alloc = *alloc;
659 }
660 
661 /*
662  * Called when we got a page, either from a pool or newly allocated.
663  * if needed, dma map the page and populate the dma address array.
664  * Populate the page address array.
665  * If the caching is consistent, update any deferred caching. Otherwise
666  * stage this page for an upcoming deferred caching update.
667  */
ttm_pool_page_allocated(struct ttm_pool * pool,unsigned int order,struct page * p,enum ttm_caching page_caching,struct ttm_pool_alloc_state * alloc,struct ttm_pool_tt_restore * restore)668 static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
669 				   struct page *p, enum ttm_caching page_caching,
670 				   struct ttm_pool_alloc_state *alloc,
671 				   struct ttm_pool_tt_restore *restore)
672 {
673 	bool caching_consistent;
674 	dma_addr_t first_dma;
675 	int r = 0;
676 
677 	caching_consistent = (page_caching == alloc->tt_caching) || PageHighMem(p);
678 
679 	if (caching_consistent) {
680 		r = ttm_pool_apply_caching(alloc);
681 		if (r)
682 			return r;
683 	}
684 
685 	if (alloc->dma_addr) {
686 		r = ttm_pool_map(pool, order, p, &first_dma);
687 		if (r)
688 			return r;
689 	}
690 
691 	if (restore) {
692 		ttm_pool_page_allocated_restore(pool, order, p, page_caching,
693 						first_dma, restore, alloc);
694 	} else {
695 		ttm_pool_allocated_page_commit(p, first_dma, alloc, 1UL << order);
696 
697 		if (caching_consistent)
698 			alloc->caching_divide = alloc->pages;
699 	}
700 
701 	return 0;
702 }
703 
704 /**
705  * ttm_pool_free_range() - Free a range of TTM pages
706  * @pool: The pool used for allocating.
707  * @tt: The struct ttm_tt holding the page pointers.
708  * @caching: The page caching mode used by the range.
709  * @start_page: index for first page to free.
710  * @end_page: index for last page to free + 1.
711  *
712  * During allocation the ttm_tt page-vector may be populated with ranges of
713  * pages with different attributes if allocation hit an error without being
714  * able to completely fulfill the allocation. This function can be used
715  * to free these individual ranges.
716  */
ttm_pool_free_range(struct ttm_pool * pool,struct ttm_tt * tt,enum ttm_caching caching,pgoff_t start_page,pgoff_t end_page)717 static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
718 				enum ttm_caching caching,
719 				pgoff_t start_page, pgoff_t end_page)
720 {
721 	struct page **pages = &tt->pages[start_page];
722 	struct file *backup = tt->backup;
723 	pgoff_t i, nr;
724 
725 	for (i = start_page; i < end_page; i += nr, pages += nr) {
726 		struct page *p = *pages;
727 
728 		nr = 1;
729 		if (ttm_backup_page_ptr_is_handle(p)) {
730 			unsigned long handle = ttm_backup_page_ptr_to_handle(p);
731 
732 			if (handle != 0)
733 				ttm_backup_drop(backup, handle);
734 		} else if (p) {
735 			dma_addr_t *dma_addr = tt->dma_address ?
736 				tt->dma_address + i : NULL;
737 
738 			nr = ttm_pool_unmap_and_free(pool, p, dma_addr, caching);
739 		}
740 	}
741 }
742 
ttm_pool_alloc_state_init(const struct ttm_tt * tt,struct ttm_pool_alloc_state * alloc)743 static void ttm_pool_alloc_state_init(const struct ttm_tt *tt,
744 				      struct ttm_pool_alloc_state *alloc)
745 {
746 	alloc->pages = tt->pages;
747 	alloc->caching_divide = tt->pages;
748 	alloc->dma_addr = tt->dma_address;
749 	alloc->remaining_pages = tt->num_pages;
750 	alloc->tt_caching = tt->caching;
751 }
752 
753 /*
754  * Find a suitable allocation order based on highest desired order
755  * and number of remaining pages
756  */
ttm_pool_alloc_find_order(unsigned int highest,const struct ttm_pool_alloc_state * alloc)757 static unsigned int ttm_pool_alloc_find_order(unsigned int highest,
758 					      const struct ttm_pool_alloc_state *alloc)
759 {
760 	return min_t(unsigned int, highest, __fls(alloc->remaining_pages));
761 }
762 
__ttm_pool_alloc(struct ttm_pool * pool,struct ttm_tt * tt,const struct ttm_operation_ctx * ctx,struct ttm_pool_alloc_state * alloc,struct ttm_pool_tt_restore * restore)763 static int __ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
764 			    const struct ttm_operation_ctx *ctx,
765 			    struct ttm_pool_alloc_state *alloc,
766 			    struct ttm_pool_tt_restore *restore)
767 {
768 	enum ttm_caching page_caching;
769 	gfp_t gfp_flags = GFP_USER;
770 	pgoff_t caching_divide;
771 	unsigned int order;
772 	bool allow_pools;
773 	struct page *p;
774 	int r;
775 
776 	WARN_ON(!alloc->remaining_pages || ttm_tt_is_populated(tt));
777 	WARN_ON(alloc->dma_addr && !pool->dev);
778 
779 	if (tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
780 		gfp_flags |= __GFP_ZERO;
781 
782 	if (ctx->gfp_retry_mayfail)
783 		gfp_flags |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
784 
785 	if (ttm_pool_uses_dma32(pool))
786 		gfp_flags |= GFP_DMA32;
787 	else
788 		gfp_flags |= GFP_HIGHUSER;
789 
790 	page_caching = tt->caching;
791 	allow_pools = true;
792 	for (order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
793 	     alloc->remaining_pages;
794 	     order = ttm_pool_alloc_find_order(order, alloc)) {
795 		struct ttm_pool_type *pt;
796 
797 		/* First, try to allocate a page from a pool if one exists. */
798 		p = NULL;
799 		pt = ttm_pool_select_type(pool, page_caching, order);
800 		if (pt && allow_pools)
801 			p = ttm_pool_type_take(pt, ttm_pool_nid(pool));
802 
803 		/*
804 		 * If that fails or previously failed, allocate from system.
805 		 * Note that this also disallows additional pool allocations using
806 		 * write-back cached pools of the same order. Consider removing
807 		 * that behaviour.
808 		 */
809 		if (!p) {
810 			page_caching = ttm_cached;
811 			allow_pools = false;
812 			p = ttm_pool_alloc_page(pool, gfp_flags, order);
813 		}
814 		/* If that fails, lower the order if possible and retry. */
815 		if (!p) {
816 			if (order) {
817 				--order;
818 				page_caching = tt->caching;
819 				allow_pools = true;
820 				continue;
821 			}
822 			r = -ENOMEM;
823 			goto error_free_all;
824 		}
825 		r = ttm_pool_page_allocated(pool, order, p, page_caching, alloc,
826 					    restore);
827 		if (r)
828 			goto error_free_page;
829 
830 		if (ttm_pool_restore_valid(restore)) {
831 			r = ttm_pool_restore_commit(restore, tt->backup, ctx, alloc);
832 			if (r)
833 				goto error_free_all;
834 		}
835 	}
836 
837 	r = ttm_pool_apply_caching(alloc);
838 	if (r)
839 		goto error_free_all;
840 
841 	kfree(tt->restore);
842 	tt->restore = NULL;
843 
844 	return 0;
845 
846 error_free_page:
847 	ttm_pool_free_page(pool, page_caching, order, p, false);
848 
849 error_free_all:
850 	if (tt->restore)
851 		return r;
852 
853 	caching_divide = alloc->caching_divide - tt->pages;
854 	ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide);
855 	ttm_pool_free_range(pool, tt, ttm_cached, caching_divide,
856 			    tt->num_pages - alloc->remaining_pages);
857 
858 	return r;
859 }
860 
861 /**
862  * ttm_pool_alloc - Fill a ttm_tt object
863  *
864  * @pool: ttm_pool to use
865  * @tt: ttm_tt object to fill
866  * @ctx: operation context
867  *
868  * Fill the ttm_tt object with pages and also make sure to DMA map them when
869  * necessary.
870  *
871  * Returns: 0 on successe, negative error code otherwise.
872  */
ttm_pool_alloc(struct ttm_pool * pool,struct ttm_tt * tt,struct ttm_operation_ctx * ctx)873 int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
874 		   struct ttm_operation_ctx *ctx)
875 {
876 	struct ttm_pool_alloc_state alloc;
877 
878 	if (WARN_ON(ttm_tt_is_backed_up(tt)))
879 		return -EINVAL;
880 
881 	ttm_pool_alloc_state_init(tt, &alloc);
882 
883 	return __ttm_pool_alloc(pool, tt, ctx, &alloc, NULL);
884 }
885 EXPORT_SYMBOL(ttm_pool_alloc);
886 
887 /**
888  * ttm_pool_restore_and_alloc - Fill a ttm_tt, restoring previously backed-up
889  * content.
890  *
891  * @pool: ttm_pool to use
892  * @tt: ttm_tt object to fill
893  * @ctx: operation context
894  *
895  * Fill the ttm_tt object with pages and also make sure to DMA map them when
896  * necessary. Read in backed-up content.
897  *
898  * Returns: 0 on successe, negative error code otherwise.
899  */
ttm_pool_restore_and_alloc(struct ttm_pool * pool,struct ttm_tt * tt,const struct ttm_operation_ctx * ctx)900 int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
901 			       const struct ttm_operation_ctx *ctx)
902 {
903 	struct ttm_pool_tt_restore *restore = tt->restore;
904 	struct ttm_pool_alloc_state alloc;
905 
906 	if (WARN_ON(!ttm_tt_is_backed_up(tt)))
907 		return -EINVAL;
908 
909 	if (!restore) {
910 		gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
911 
912 		ttm_pool_alloc_state_init(tt, &alloc);
913 		if (ctx->gfp_retry_mayfail)
914 			gfp |= __GFP_RETRY_MAYFAIL;
915 
916 		restore = kzalloc_obj(*restore, gfp);
917 		if (!restore)
918 			return -ENOMEM;
919 
920 		restore->snapshot_alloc = alloc;
921 		restore->pool = pool;
922 		restore->restored_pages = 1;
923 
924 		tt->restore = restore;
925 	} else {
926 		alloc = restore->snapshot_alloc;
927 		if (ttm_pool_restore_valid(restore)) {
928 			int ret = ttm_pool_restore_commit(restore, tt->backup,
929 							  ctx, &alloc);
930 
931 			if (ret)
932 				return ret;
933 		}
934 		if (!alloc.remaining_pages)
935 			return 0;
936 	}
937 
938 	return __ttm_pool_alloc(pool, tt, ctx, &alloc, restore);
939 }
940 
941 /**
942  * ttm_pool_free - Free the backing pages from a ttm_tt object
943  *
944  * @pool: Pool to give pages back to.
945  * @tt: ttm_tt object to unpopulate
946  *
947  * Give the packing pages back to a pool or free them
948  */
ttm_pool_free(struct ttm_pool * pool,struct ttm_tt * tt)949 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
950 {
951 	int nid = ttm_pool_nid(pool);
952 
953 	ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages);
954 
955 	while (atomic_long_read(&allocated_pages[nid]) > pool_node_limit[nid]) {
956 		unsigned long diff = atomic_long_read(&allocated_pages[nid]) - pool_node_limit[nid];
957 		ttm_pool_shrink(nid, diff);
958 	}
959 }
960 EXPORT_SYMBOL(ttm_pool_free);
961 
962 /**
963  * ttm_pool_drop_backed_up() - Release content of a swapped-out struct ttm_tt
964  * @tt: The struct ttm_tt.
965  *
966  * Release handles with associated content or any remaining pages of
967  * a backed-up struct ttm_tt.
968  */
ttm_pool_drop_backed_up(struct ttm_tt * tt)969 void ttm_pool_drop_backed_up(struct ttm_tt *tt)
970 {
971 	struct ttm_pool_tt_restore *restore;
972 	pgoff_t start_page = 0;
973 
974 	WARN_ON(!ttm_tt_is_backed_up(tt));
975 
976 	restore = tt->restore;
977 
978 	/*
979 	 * Unmap and free any uncommitted restore page.
980 	 * any tt page-array backup entries already read back has
981 	 * been cleared already
982 	 */
983 	if (ttm_pool_restore_valid(restore)) {
984 		dma_addr_t *dma_addr = tt->dma_address ? &restore->first_dma : NULL;
985 
986 		ttm_pool_unmap_and_free(restore->pool, restore->alloced_page,
987 					dma_addr, restore->page_caching);
988 		restore->restored_pages = 1UL << restore->order;
989 	}
990 
991 	/*
992 	 * If a restore is ongoing, part of the tt pages may have a
993 	 * caching different than writeback.
994 	 */
995 	if (restore) {
996 		pgoff_t mid = restore->snapshot_alloc.caching_divide - tt->pages;
997 
998 		start_page = restore->alloced_pages;
999 		WARN_ON(mid > start_page);
1000 		/* Pages that might be dma-mapped and non-cached */
1001 		ttm_pool_free_range(restore->pool, tt, tt->caching,
1002 				    0, mid);
1003 		/* Pages that might be dma-mapped but cached */
1004 		ttm_pool_free_range(restore->pool, tt, ttm_cached,
1005 				    mid, restore->alloced_pages);
1006 		kfree(restore);
1007 		tt->restore = NULL;
1008 	}
1009 
1010 	ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt->num_pages);
1011 }
1012 
1013 /**
1014  * ttm_pool_backup() - Back up or purge a struct ttm_tt
1015  * @pool: The pool used when allocating the struct ttm_tt.
1016  * @tt: The struct ttm_tt.
1017  * @flags: Flags to govern the backup behaviour.
1018  *
1019  * Back up or purge a struct ttm_tt. If @purge is true, then
1020  * all pages will be freed directly to the system rather than to the pool
1021  * they were allocated from, making the function behave similarly to
1022  * ttm_pool_free(). If @purge is false the pages will be backed up instead,
1023  * exchanged for handles.
1024  * A subsequent call to ttm_pool_restore_and_alloc() will then read back the content and
1025  * a subsequent call to ttm_pool_drop_backed_up() will drop it.
1026  * If backup of a page fails for whatever reason, @ttm will still be
1027  * partially backed up, retaining those pages for which backup fails.
1028  * In that case, this function can be retried, possibly after freeing up
1029  * memory resources.
1030  *
1031  * Return: Number of pages actually backed up or freed, or negative
1032  * error code on error.
1033  */
ttm_pool_backup(struct ttm_pool * pool,struct ttm_tt * tt,const struct ttm_backup_flags * flags)1034 long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt,
1035 		     const struct ttm_backup_flags *flags)
1036 {
1037 	struct file *backup = tt->backup;
1038 	struct page *page;
1039 	unsigned long handle;
1040 	gfp_t alloc_gfp;
1041 	gfp_t gfp;
1042 	int ret = 0;
1043 	pgoff_t shrunken = 0;
1044 	pgoff_t i, num_pages;
1045 
1046 	if (WARN_ON(ttm_tt_is_backed_up(tt)))
1047 		return -EINVAL;
1048 
1049 	if ((!ttm_backup_bytes_avail() && !flags->purge) ||
1050 	    ttm_pool_uses_dma_alloc(pool) || ttm_tt_is_backed_up(tt))
1051 		return -EBUSY;
1052 
1053 #ifdef CONFIG_X86
1054 	/* Anything returned to the system needs to be cached. */
1055 	if (tt->caching != ttm_cached)
1056 		set_pages_array_wb(tt->pages, tt->num_pages);
1057 #endif
1058 
1059 	if (tt->dma_address || flags->purge) {
1060 		for (i = 0; i < tt->num_pages; i += num_pages) {
1061 			unsigned int order;
1062 
1063 			page = tt->pages[i];
1064 			if (unlikely(!page)) {
1065 				num_pages = 1;
1066 				continue;
1067 			}
1068 
1069 			order = ttm_pool_page_order(pool, page);
1070 			num_pages = 1UL << order;
1071 			if (tt->dma_address)
1072 				ttm_pool_unmap(pool, tt->dma_address[i],
1073 					       num_pages);
1074 			if (flags->purge) {
1075 				shrunken += num_pages;
1076 				page->private = 0;
1077 				__free_pages_gpu_account(page, order, false);
1078 				memset(tt->pages + i, 0,
1079 				       num_pages * sizeof(*tt->pages));
1080 			}
1081 		}
1082 	}
1083 
1084 	if (flags->purge)
1085 		return shrunken;
1086 
1087 	if (ttm_pool_uses_dma32(pool))
1088 		gfp = GFP_DMA32;
1089 	else
1090 		gfp = GFP_HIGHUSER;
1091 
1092 	alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL;
1093 
1094 	num_pages = tt->num_pages;
1095 
1096 	/* Pretend doing fault injection by shrinking only half of the pages. */
1097 	if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
1098 		num_pages = DIV_ROUND_UP(num_pages, 2);
1099 
1100 	for (i = 0; i < num_pages; ++i) {
1101 		s64 shandle;
1102 
1103 		page = tt->pages[i];
1104 		if (unlikely(!page))
1105 			continue;
1106 
1107 		ttm_pool_split_for_swap(pool, page);
1108 
1109 		shandle = ttm_backup_backup_page(backup, page, flags->writeback, i,
1110 						 gfp, alloc_gfp);
1111 		if (shandle < 0) {
1112 			/* We allow partially shrunken tts */
1113 			ret = shandle;
1114 			break;
1115 		}
1116 		handle = shandle;
1117 		tt->pages[i] = ttm_backup_handle_to_page_ptr(handle);
1118 		__free_pages_gpu_account(page, 0, false);
1119 		shrunken++;
1120 	}
1121 
1122 	return shrunken ? shrunken : ret;
1123 }
1124 
1125 /**
1126  * ttm_pool_init - Initialize a pool
1127  *
1128  * @pool: the pool to initialize
1129  * @dev: device for DMA allocations and mappings
1130  * @nid: NUMA node to use for allocations
1131  * @alloc_flags: TTM_ALLOCATION_POOL_* flags
1132  *
1133  * Initialize the pool and its pool types.
1134  */
ttm_pool_init(struct ttm_pool * pool,struct device * dev,int nid,unsigned int alloc_flags)1135 void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
1136 		   int nid, unsigned int alloc_flags)
1137 {
1138 	unsigned int i, j;
1139 
1140 	WARN_ON(!dev && ttm_pool_uses_dma_alloc(pool));
1141 
1142 	pool->dev = dev;
1143 	pool->nid = nid;
1144 	pool->alloc_flags = alloc_flags;
1145 
1146 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
1147 		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
1148 			struct ttm_pool_type *pt;
1149 
1150 			/* Initialize only pool types which are actually used */
1151 			pt = ttm_pool_select_type(pool, i, j);
1152 			if (pt != &pool->caching[i].orders[j])
1153 				continue;
1154 
1155 			ttm_pool_type_init(pt, pool, i, j);
1156 		}
1157 	}
1158 }
1159 EXPORT_SYMBOL(ttm_pool_init);
1160 
1161 /**
1162  * ttm_pool_synchronize_shrinkers - Wait for all running shrinkers to complete.
1163  *
1164  * This is useful to guarantee that all shrinker invocations have seen an
1165  * update, before freeing memory, similar to rcu.
1166  */
ttm_pool_synchronize_shrinkers(void)1167 static void ttm_pool_synchronize_shrinkers(void)
1168 {
1169 	down_write(&pool_shrink_rwsem);
1170 	up_write(&pool_shrink_rwsem);
1171 }
1172 
1173 /**
1174  * ttm_pool_fini - Cleanup a pool
1175  *
1176  * @pool: the pool to clean up
1177  *
1178  * Free all pages in the pool and unregister the types from the global
1179  * shrinker.
1180  */
ttm_pool_fini(struct ttm_pool * pool)1181 void ttm_pool_fini(struct ttm_pool *pool)
1182 {
1183 	unsigned int i, j;
1184 
1185 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
1186 		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
1187 			struct ttm_pool_type *pt;
1188 
1189 			pt = ttm_pool_select_type(pool, i, j);
1190 			if (pt != &pool->caching[i].orders[j])
1191 				continue;
1192 
1193 			ttm_pool_type_fini(pt);
1194 		}
1195 	}
1196 
1197 	/* We removed the pool types from the LRU, but we need to also make sure
1198 	 * that no shrinker is concurrently freeing pages from the pool.
1199 	 */
1200 	ttm_pool_synchronize_shrinkers();
1201 }
1202 EXPORT_SYMBOL(ttm_pool_fini);
1203 
1204 /* Free average pool number of pages.  */
1205 #define TTM_SHRINKER_BATCH ((1 << (MAX_PAGE_ORDER / 2)) * NR_PAGE_ORDERS)
1206 
ttm_pool_shrinker_scan(struct shrinker * shrink,struct shrink_control * sc)1207 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
1208 					    struct shrink_control *sc)
1209 {
1210 	unsigned long num_freed = 0;
1211 
1212 	do
1213 		num_freed += ttm_pool_shrink(sc->nid, sc->nr_to_scan);
1214 	while (num_freed < sc->nr_to_scan &&
1215 	       atomic_long_read(&allocated_pages[sc->nid]));
1216 
1217 	sc->nr_scanned = num_freed;
1218 
1219 	return num_freed ?: SHRINK_STOP;
1220 }
1221 
1222 /* Return the number of pages available or SHRINK_EMPTY if we have none */
ttm_pool_shrinker_count(struct shrinker * shrink,struct shrink_control * sc)1223 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
1224 					     struct shrink_control *sc)
1225 {
1226 	unsigned long num_pages = atomic_long_read(&allocated_pages[sc->nid]);
1227 
1228 	return num_pages ? num_pages : SHRINK_EMPTY;
1229 }
1230 
1231 #ifdef CONFIG_DEBUG_FS
1232 /* Count the number of pages available in a pool_type */
ttm_pool_type_count(struct ttm_pool_type * pt)1233 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
1234 {
1235 	return list_lru_count(&pt->pages);
1236 }
1237 
1238 /* Print a nice header for the order */
ttm_pool_debugfs_header(struct seq_file * m)1239 static void ttm_pool_debugfs_header(struct seq_file *m)
1240 {
1241 	unsigned int i;
1242 
1243 	seq_puts(m, "\t ");
1244 	for (i = 0; i < NR_PAGE_ORDERS; ++i)
1245 		seq_printf(m, " ---%2u---", i);
1246 	seq_puts(m, "\n");
1247 }
1248 
1249 /* Dump information about the different pool types */
ttm_pool_debugfs_orders(struct ttm_pool_type * pt,struct seq_file * m)1250 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
1251 				    struct seq_file *m)
1252 {
1253 	unsigned int i;
1254 
1255 	for (i = 0; i < NR_PAGE_ORDERS; ++i)
1256 		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
1257 	seq_puts(m, "\n");
1258 }
1259 
1260 /* Dump the total amount of allocated pages */
ttm_pool_debugfs_footer(struct seq_file * m)1261 static void ttm_pool_debugfs_footer(struct seq_file *m)
1262 {
1263 	int nid;
1264 
1265 	for_each_node(nid) {
1266 		seq_printf(m, "\ntotal node%d\t: %8lu of %8lu\n", nid,
1267 			   atomic_long_read(&allocated_pages[nid]), pool_node_limit[nid]);
1268 	}
1269 }
1270 
1271 /* Dump the information for the global pools */
ttm_pool_debugfs_globals_show(struct seq_file * m,void * data)1272 static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
1273 {
1274 	ttm_pool_debugfs_header(m);
1275 
1276 	spin_lock(&shrinker_lock);
1277 	seq_puts(m, "wc\t:");
1278 	ttm_pool_debugfs_orders(global_write_combined, m);
1279 	seq_puts(m, "uc\t:");
1280 	ttm_pool_debugfs_orders(global_uncached, m);
1281 	seq_puts(m, "wc 32\t:");
1282 	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
1283 	seq_puts(m, "uc 32\t:");
1284 	ttm_pool_debugfs_orders(global_dma32_uncached, m);
1285 	spin_unlock(&shrinker_lock);
1286 
1287 	ttm_pool_debugfs_footer(m);
1288 
1289 	return 0;
1290 }
1291 DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals);
1292 
1293 /**
1294  * ttm_pool_debugfs - Debugfs dump function for a pool
1295  *
1296  * @pool: the pool to dump the information for
1297  * @m: seq_file to dump to
1298  *
1299  * Make a debugfs dump with the per pool and global information.
1300  */
ttm_pool_debugfs(struct ttm_pool * pool,struct seq_file * m)1301 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
1302 {
1303 	unsigned int i;
1304 
1305 	if (!ttm_pool_uses_dma_alloc(pool)) {
1306 		seq_puts(m, "unused\n");
1307 		return 0;
1308 	}
1309 
1310 	ttm_pool_debugfs_header(m);
1311 
1312 	spin_lock(&shrinker_lock);
1313 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
1314 		if (!ttm_pool_select_type(pool, i, 0))
1315 			continue;
1316 		seq_puts(m, "DMA ");
1317 		switch (i) {
1318 		case ttm_cached:
1319 			seq_puts(m, "\t:");
1320 			break;
1321 		case ttm_write_combined:
1322 			seq_puts(m, "wc\t:");
1323 			break;
1324 		case ttm_uncached:
1325 			seq_puts(m, "uc\t:");
1326 			break;
1327 		}
1328 		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
1329 	}
1330 	spin_unlock(&shrinker_lock);
1331 
1332 	ttm_pool_debugfs_footer(m);
1333 	return 0;
1334 }
1335 EXPORT_SYMBOL(ttm_pool_debugfs);
1336 
1337 /* Test the shrinker functions and dump the result */
ttm_pool_debugfs_shrink_show(struct seq_file * m,void * data)1338 static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data)
1339 {
1340 	struct shrink_control sc = {
1341 		.gfp_mask = GFP_NOFS,
1342 		.nr_to_scan = TTM_SHRINKER_BATCH,
1343 	};
1344 	unsigned long count;
1345 	int nid;
1346 
1347 	fs_reclaim_acquire(GFP_KERNEL);
1348 	for_each_node(nid) {
1349 		sc.nid = nid;
1350 		count = ttm_pool_shrinker_count(mm_shrinker, &sc);
1351 		seq_printf(m, "%d: %lu/%lu\n", nid, count,
1352 			   ttm_pool_shrinker_scan(mm_shrinker, &sc));
1353 	}
1354 	fs_reclaim_release(GFP_KERNEL);
1355 
1356 	return 0;
1357 }
1358 DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
1359 
1360 #endif
1361 
ttm_get_node_memory_size(int nid)1362 static inline u64 ttm_get_node_memory_size(int nid)
1363 {
1364 	/*
1365 	 * This is directly using si_meminfo_node implementation as the
1366 	 * function is not exported.
1367 	 */
1368 	int zone_type;
1369 	u64 managed_pages = 0;
1370 
1371 	pg_data_t *pgdat = NODE_DATA(nid);
1372 
1373 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1374 		managed_pages +=
1375 			zone_managed_pages(&pgdat->node_zones[zone_type]);
1376 	return managed_pages * PAGE_SIZE;
1377 }
1378 
1379 /**
1380  * ttm_pool_mgr_init - Initialize globals
1381  *
1382  * @num_pages: default number of pages
1383  *
1384  * Initialize the global locks and lists for the MM shrinker.
1385  */
ttm_pool_mgr_init(unsigned long num_pages)1386 int ttm_pool_mgr_init(unsigned long num_pages)
1387 {
1388 	unsigned int i;
1389 
1390 	int nid;
1391 	for_each_node(nid) {
1392 		if (!page_pool_size) {
1393 			u64 node_size = ttm_get_node_memory_size(nid);
1394 			pool_node_limit[nid] = (node_size >> PAGE_SHIFT) / 2;
1395 		} else {
1396 			pool_node_limit[nid] = page_pool_size;
1397 		}
1398 	}
1399 
1400 	spin_lock_init(&shrinker_lock);
1401 	INIT_LIST_HEAD(&shrinker_list);
1402 
1403 	for (i = 0; i < NR_PAGE_ORDERS; ++i) {
1404 		ttm_pool_type_init(&global_write_combined[i], NULL,
1405 				   ttm_write_combined, i);
1406 		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
1407 
1408 		ttm_pool_type_init(&global_dma32_write_combined[i], NULL,
1409 				   ttm_write_combined, i);
1410 		ttm_pool_type_init(&global_dma32_uncached[i], NULL,
1411 				   ttm_uncached, i);
1412 	}
1413 
1414 #ifdef CONFIG_DEBUG_FS
1415 	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
1416 			    &ttm_pool_debugfs_globals_fops);
1417 	debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
1418 			    &ttm_pool_debugfs_shrink_fops);
1419 #ifdef CONFIG_FAULT_INJECTION
1420 	fault_create_debugfs_attr("backup_fault_inject", ttm_debugfs_root,
1421 				  &backup_fault_inject);
1422 #endif
1423 #endif
1424 
1425 	mm_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "drm-ttm_pool");
1426 	if (!mm_shrinker)
1427 		return -ENOMEM;
1428 
1429 	mm_shrinker->count_objects = ttm_pool_shrinker_count;
1430 	mm_shrinker->scan_objects = ttm_pool_shrinker_scan;
1431 	mm_shrinker->batch = TTM_SHRINKER_BATCH;
1432 	mm_shrinker->seeks = 1;
1433 
1434 	shrinker_register(mm_shrinker);
1435 
1436 	return 0;
1437 }
1438 
1439 /**
1440  * ttm_pool_mgr_fini - Finalize globals
1441  *
1442  * Cleanup the global pools and unregister the MM shrinker.
1443  */
ttm_pool_mgr_fini(void)1444 void ttm_pool_mgr_fini(void)
1445 {
1446 	unsigned int i;
1447 
1448 	for (i = 0; i < NR_PAGE_ORDERS; ++i) {
1449 		ttm_pool_type_fini(&global_write_combined[i]);
1450 		ttm_pool_type_fini(&global_uncached[i]);
1451 
1452 		ttm_pool_type_fini(&global_dma32_write_combined[i]);
1453 		ttm_pool_type_fini(&global_dma32_uncached[i]);
1454 	}
1455 
1456 	shrinker_free(mm_shrinker);
1457 	WARN_ON(!list_empty(&shrinker_list));
1458 }
1459