xref: /linux/drivers/iommu/amd/io_pgtable.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
1c9b258c6SSuravee Suthikulpanit // SPDX-License-Identifier: GPL-2.0-only
2c9b258c6SSuravee Suthikulpanit /*
3c9b258c6SSuravee Suthikulpanit  * CPU-agnostic AMD IO page table allocator.
4c9b258c6SSuravee Suthikulpanit  *
5c9b258c6SSuravee Suthikulpanit  * Copyright (C) 2020 Advanced Micro Devices, Inc.
6c9b258c6SSuravee Suthikulpanit  * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
7c9b258c6SSuravee Suthikulpanit  */
8c9b258c6SSuravee Suthikulpanit 
9c9b258c6SSuravee Suthikulpanit #define pr_fmt(fmt)     "AMD-Vi: " fmt
10c9b258c6SSuravee Suthikulpanit #define dev_fmt(fmt)    pr_fmt(fmt)
11c9b258c6SSuravee Suthikulpanit 
12c9b258c6SSuravee Suthikulpanit #include <linux/atomic.h>
13c9b258c6SSuravee Suthikulpanit #include <linux/bitops.h>
14c9b258c6SSuravee Suthikulpanit #include <linux/io-pgtable.h>
15c9b258c6SSuravee Suthikulpanit #include <linux/kernel.h>
16c9b258c6SSuravee Suthikulpanit #include <linux/sizes.h>
17c9b258c6SSuravee Suthikulpanit #include <linux/slab.h>
18c9b258c6SSuravee Suthikulpanit #include <linux/types.h>
19c9b258c6SSuravee Suthikulpanit #include <linux/dma-mapping.h>
20c9b258c6SSuravee Suthikulpanit 
21c9b258c6SSuravee Suthikulpanit #include <asm/barrier.h>
22c9b258c6SSuravee Suthikulpanit 
23c9b258c6SSuravee Suthikulpanit #include "amd_iommu_types.h"
24c9b258c6SSuravee Suthikulpanit #include "amd_iommu.h"
2575114cbaSPasha Tatashin #include "../iommu-pages.h"
26c9b258c6SSuravee Suthikulpanit 
27c9b258c6SSuravee Suthikulpanit /*
2818954252SSuravee Suthikulpanit  * Helper function to get the first pte of a large mapping
2918954252SSuravee Suthikulpanit  */
first_pte_l7(u64 * pte,unsigned long * page_size,unsigned long * count)3018954252SSuravee Suthikulpanit static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
3118954252SSuravee Suthikulpanit 			 unsigned long *count)
3218954252SSuravee Suthikulpanit {
3318954252SSuravee Suthikulpanit 	unsigned long pte_mask, pg_size, cnt;
3418954252SSuravee Suthikulpanit 	u64 *fpte;
3518954252SSuravee Suthikulpanit 
3618954252SSuravee Suthikulpanit 	pg_size  = PTE_PAGE_SIZE(*pte);
3718954252SSuravee Suthikulpanit 	cnt      = PAGE_SIZE_PTE_COUNT(pg_size);
3818954252SSuravee Suthikulpanit 	pte_mask = ~((cnt << 3) - 1);
3918954252SSuravee Suthikulpanit 	fpte     = (u64 *)(((unsigned long)pte) & pte_mask);
4018954252SSuravee Suthikulpanit 
4118954252SSuravee Suthikulpanit 	if (page_size)
4218954252SSuravee Suthikulpanit 		*page_size = pg_size;
4318954252SSuravee Suthikulpanit 
4418954252SSuravee Suthikulpanit 	if (count)
4518954252SSuravee Suthikulpanit 		*count = cnt;
4618954252SSuravee Suthikulpanit 
4718954252SSuravee Suthikulpanit 	return fpte;
4818954252SSuravee Suthikulpanit }
4918954252SSuravee Suthikulpanit 
5018954252SSuravee Suthikulpanit /****************************************************************************
5118954252SSuravee Suthikulpanit  *
5218954252SSuravee Suthikulpanit  * The functions below are used the create the page table mappings for
5318954252SSuravee Suthikulpanit  * unity mapped regions.
5418954252SSuravee Suthikulpanit  *
5518954252SSuravee Suthikulpanit  ****************************************************************************/
5618954252SSuravee Suthikulpanit 
free_pt_page(u64 * pt,struct list_head * freelist)57ce00eeceSMatthew Wilcox (Oracle) static void free_pt_page(u64 *pt, struct list_head *freelist)
5818954252SSuravee Suthikulpanit {
596b3106e9SRobin Murphy 	struct page *p = virt_to_page(pt);
6018954252SSuravee Suthikulpanit 
61ce00eeceSMatthew Wilcox (Oracle) 	list_add_tail(&p->lru, freelist);
6218954252SSuravee Suthikulpanit }
6318954252SSuravee Suthikulpanit 
free_pt_lvl(u64 * pt,struct list_head * freelist,int lvl)64ce00eeceSMatthew Wilcox (Oracle) static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
656b3106e9SRobin Murphy {
666b3106e9SRobin Murphy 	u64 *p;
676b3106e9SRobin Murphy 	int i;
686b3106e9SRobin Murphy 
696b3106e9SRobin Murphy 	for (i = 0; i < 512; ++i) {
706b3106e9SRobin Murphy 		/* PTE present? */
716b3106e9SRobin Murphy 		if (!IOMMU_PTE_PRESENT(pt[i]))
726b3106e9SRobin Murphy 			continue;
736b3106e9SRobin Murphy 
746b3106e9SRobin Murphy 		/* Large PTE? */
756b3106e9SRobin Murphy 		if (PM_PTE_LEVEL(pt[i]) == 0 ||
766b3106e9SRobin Murphy 		    PM_PTE_LEVEL(pt[i]) == 7)
776b3106e9SRobin Murphy 			continue;
786b3106e9SRobin Murphy 
796b3106e9SRobin Murphy 		/*
806b3106e9SRobin Murphy 		 * Free the next level. No need to look at l1 tables here since
816b3106e9SRobin Murphy 		 * they can only contain leaf PTEs; just free them directly.
826b3106e9SRobin Murphy 		 */
836b3106e9SRobin Murphy 		p = IOMMU_PTE_PAGE(pt[i]);
846b3106e9SRobin Murphy 		if (lvl > 2)
85ce00eeceSMatthew Wilcox (Oracle) 			free_pt_lvl(p, freelist, lvl - 1);
866b3106e9SRobin Murphy 		else
87ce00eeceSMatthew Wilcox (Oracle) 			free_pt_page(p, freelist);
8818954252SSuravee Suthikulpanit 	}
8918954252SSuravee Suthikulpanit 
90ce00eeceSMatthew Wilcox (Oracle) 	free_pt_page(pt, freelist);
916b3106e9SRobin Murphy }
9218954252SSuravee Suthikulpanit 
free_sub_pt(u64 * root,int mode,struct list_head * freelist)93ce00eeceSMatthew Wilcox (Oracle) static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
9418954252SSuravee Suthikulpanit {
9518954252SSuravee Suthikulpanit 	switch (mode) {
9618954252SSuravee Suthikulpanit 	case PAGE_MODE_NONE:
9718954252SSuravee Suthikulpanit 	case PAGE_MODE_7_LEVEL:
9818954252SSuravee Suthikulpanit 		break;
9918954252SSuravee Suthikulpanit 	case PAGE_MODE_1_LEVEL:
100ce00eeceSMatthew Wilcox (Oracle) 		free_pt_page(root, freelist);
10118954252SSuravee Suthikulpanit 		break;
10218954252SSuravee Suthikulpanit 	case PAGE_MODE_2_LEVEL:
10318954252SSuravee Suthikulpanit 	case PAGE_MODE_3_LEVEL:
10418954252SSuravee Suthikulpanit 	case PAGE_MODE_4_LEVEL:
10518954252SSuravee Suthikulpanit 	case PAGE_MODE_5_LEVEL:
10618954252SSuravee Suthikulpanit 	case PAGE_MODE_6_LEVEL:
1076b3106e9SRobin Murphy 		free_pt_lvl(root, freelist, mode);
10818954252SSuravee Suthikulpanit 		break;
10918954252SSuravee Suthikulpanit 	default:
11018954252SSuravee Suthikulpanit 		BUG();
11118954252SSuravee Suthikulpanit 	}
11218954252SSuravee Suthikulpanit }
11318954252SSuravee Suthikulpanit 
11418954252SSuravee Suthikulpanit /*
11518954252SSuravee Suthikulpanit  * This function is used to add another level to an IO page table. Adding
11618954252SSuravee Suthikulpanit  * another level increases the size of the address space by 9 bits to a size up
11718954252SSuravee Suthikulpanit  * to 64 bits.
11818954252SSuravee Suthikulpanit  */
increase_address_space(struct amd_io_pgtable * pgtable,unsigned long address,gfp_t gfp)1199ac0b338SJason Gunthorpe static bool increase_address_space(struct amd_io_pgtable *pgtable,
12018954252SSuravee Suthikulpanit 				   unsigned long address,
12118954252SSuravee Suthikulpanit 				   gfp_t gfp)
12218954252SSuravee Suthikulpanit {
1239ac0b338SJason Gunthorpe 	struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
1249ac0b338SJason Gunthorpe 	struct protection_domain *domain =
1259ac0b338SJason Gunthorpe 		container_of(pgtable, struct protection_domain, iop);
12618954252SSuravee Suthikulpanit 	unsigned long flags;
12718954252SSuravee Suthikulpanit 	bool ret = true;
12818954252SSuravee Suthikulpanit 	u64 *pte;
12918954252SSuravee Suthikulpanit 
13047f218d1SJason Gunthorpe 	pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
131140456f9SAndrey Ryabinin 	if (!pte)
132140456f9SAndrey Ryabinin 		return false;
133140456f9SAndrey Ryabinin 
13418954252SSuravee Suthikulpanit 	spin_lock_irqsave(&domain->lock, flags);
13518954252SSuravee Suthikulpanit 
1369ac0b338SJason Gunthorpe 	if (address <= PM_LEVEL_SIZE(pgtable->mode))
13718954252SSuravee Suthikulpanit 		goto out;
13818954252SSuravee Suthikulpanit 
13918954252SSuravee Suthikulpanit 	ret = false;
1409ac0b338SJason Gunthorpe 	if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
14118954252SSuravee Suthikulpanit 		goto out;
14218954252SSuravee Suthikulpanit 
1439ac0b338SJason Gunthorpe 	*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
14418954252SSuravee Suthikulpanit 
1459ac0b338SJason Gunthorpe 	pgtable->root  = pte;
1469ac0b338SJason Gunthorpe 	pgtable->mode += 1;
14718954252SSuravee Suthikulpanit 	amd_iommu_update_and_flush_device_table(domain);
14818954252SSuravee Suthikulpanit 
149140456f9SAndrey Ryabinin 	pte = NULL;
15018954252SSuravee Suthikulpanit 	ret = true;
15118954252SSuravee Suthikulpanit 
15218954252SSuravee Suthikulpanit out:
15318954252SSuravee Suthikulpanit 	spin_unlock_irqrestore(&domain->lock, flags);
15475114cbaSPasha Tatashin 	iommu_free_page(pte);
15518954252SSuravee Suthikulpanit 
15618954252SSuravee Suthikulpanit 	return ret;
15718954252SSuravee Suthikulpanit }
15818954252SSuravee Suthikulpanit 
alloc_pte(struct amd_io_pgtable * pgtable,unsigned long address,unsigned long page_size,u64 ** pte_page,gfp_t gfp,bool * updated)1599ac0b338SJason Gunthorpe static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
16018954252SSuravee Suthikulpanit 		      unsigned long address,
16118954252SSuravee Suthikulpanit 		      unsigned long page_size,
16218954252SSuravee Suthikulpanit 		      u64 **pte_page,
16318954252SSuravee Suthikulpanit 		      gfp_t gfp,
16418954252SSuravee Suthikulpanit 		      bool *updated)
16518954252SSuravee Suthikulpanit {
1669ac0b338SJason Gunthorpe 	struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
16718954252SSuravee Suthikulpanit 	int level, end_lvl;
16818954252SSuravee Suthikulpanit 	u64 *pte, *page;
16918954252SSuravee Suthikulpanit 
17018954252SSuravee Suthikulpanit 	BUG_ON(!is_power_of_2(page_size));
17118954252SSuravee Suthikulpanit 
1729ac0b338SJason Gunthorpe 	while (address > PM_LEVEL_SIZE(pgtable->mode)) {
17318954252SSuravee Suthikulpanit 		/*
17418954252SSuravee Suthikulpanit 		 * Return an error if there is no memory to update the
17518954252SSuravee Suthikulpanit 		 * page-table.
17618954252SSuravee Suthikulpanit 		 */
1779ac0b338SJason Gunthorpe 		if (!increase_address_space(pgtable, address, gfp))
17818954252SSuravee Suthikulpanit 			return NULL;
17918954252SSuravee Suthikulpanit 	}
18018954252SSuravee Suthikulpanit 
18118954252SSuravee Suthikulpanit 
1829ac0b338SJason Gunthorpe 	level   = pgtable->mode - 1;
1839ac0b338SJason Gunthorpe 	pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)];
18418954252SSuravee Suthikulpanit 	address = PAGE_SIZE_ALIGN(address, page_size);
18518954252SSuravee Suthikulpanit 	end_lvl = PAGE_SIZE_LEVEL(page_size);
18618954252SSuravee Suthikulpanit 
18718954252SSuravee Suthikulpanit 	while (level > end_lvl) {
18818954252SSuravee Suthikulpanit 		u64 __pte, __npte;
18918954252SSuravee Suthikulpanit 		int pte_level;
19018954252SSuravee Suthikulpanit 
19118954252SSuravee Suthikulpanit 		__pte     = *pte;
19218954252SSuravee Suthikulpanit 		pte_level = PM_PTE_LEVEL(__pte);
19318954252SSuravee Suthikulpanit 
19418954252SSuravee Suthikulpanit 		/*
19518954252SSuravee Suthikulpanit 		 * If we replace a series of large PTEs, we need
19618954252SSuravee Suthikulpanit 		 * to tear down all of them.
19718954252SSuravee Suthikulpanit 		 */
19818954252SSuravee Suthikulpanit 		if (IOMMU_PTE_PRESENT(__pte) &&
19918954252SSuravee Suthikulpanit 		    pte_level == PAGE_MODE_7_LEVEL) {
20018954252SSuravee Suthikulpanit 			unsigned long count, i;
20118954252SSuravee Suthikulpanit 			u64 *lpte;
20218954252SSuravee Suthikulpanit 
20318954252SSuravee Suthikulpanit 			lpte = first_pte_l7(pte, NULL, &count);
20418954252SSuravee Suthikulpanit 
20518954252SSuravee Suthikulpanit 			/*
20618954252SSuravee Suthikulpanit 			 * Unmap the replicated PTEs that still match the
20718954252SSuravee Suthikulpanit 			 * original large mapping
20818954252SSuravee Suthikulpanit 			 */
20918954252SSuravee Suthikulpanit 			for (i = 0; i < count; ++i)
21018954252SSuravee Suthikulpanit 				cmpxchg64(&lpte[i], __pte, 0ULL);
21118954252SSuravee Suthikulpanit 
21218954252SSuravee Suthikulpanit 			*updated = true;
21318954252SSuravee Suthikulpanit 			continue;
21418954252SSuravee Suthikulpanit 		}
21518954252SSuravee Suthikulpanit 
21618954252SSuravee Suthikulpanit 		if (!IOMMU_PTE_PRESENT(__pte) ||
21718954252SSuravee Suthikulpanit 		    pte_level == PAGE_MODE_NONE) {
21847f218d1SJason Gunthorpe 			page = iommu_alloc_page_node(cfg->amd.nid, gfp);
21918954252SSuravee Suthikulpanit 
22018954252SSuravee Suthikulpanit 			if (!page)
22118954252SSuravee Suthikulpanit 				return NULL;
22218954252SSuravee Suthikulpanit 
22318954252SSuravee Suthikulpanit 			__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
22418954252SSuravee Suthikulpanit 
22518954252SSuravee Suthikulpanit 			/* pte could have been changed somewhere. */
2260d10fe75SUros Bizjak 			if (!try_cmpxchg64(pte, &__pte, __npte))
22775114cbaSPasha Tatashin 				iommu_free_page(page);
22818954252SSuravee Suthikulpanit 			else if (IOMMU_PTE_PRESENT(__pte))
22918954252SSuravee Suthikulpanit 				*updated = true;
23018954252SSuravee Suthikulpanit 
23118954252SSuravee Suthikulpanit 			continue;
23218954252SSuravee Suthikulpanit 		}
23318954252SSuravee Suthikulpanit 
23418954252SSuravee Suthikulpanit 		/* No level skipping support yet */
23518954252SSuravee Suthikulpanit 		if (pte_level != level)
23618954252SSuravee Suthikulpanit 			return NULL;
23718954252SSuravee Suthikulpanit 
23818954252SSuravee Suthikulpanit 		level -= 1;
23918954252SSuravee Suthikulpanit 
24018954252SSuravee Suthikulpanit 		pte = IOMMU_PTE_PAGE(__pte);
24118954252SSuravee Suthikulpanit 
24218954252SSuravee Suthikulpanit 		if (pte_page && level == end_lvl)
24318954252SSuravee Suthikulpanit 			*pte_page = pte;
24418954252SSuravee Suthikulpanit 
24518954252SSuravee Suthikulpanit 		pte = &pte[PM_LEVEL_INDEX(level, address)];
24618954252SSuravee Suthikulpanit 	}
24718954252SSuravee Suthikulpanit 
24818954252SSuravee Suthikulpanit 	return pte;
24918954252SSuravee Suthikulpanit }
25018954252SSuravee Suthikulpanit 
25118954252SSuravee Suthikulpanit /*
25218954252SSuravee Suthikulpanit  * This function checks if there is a PTE for a given dma address. If
25318954252SSuravee Suthikulpanit  * there is one, it returns the pointer to it.
25418954252SSuravee Suthikulpanit  */
fetch_pte(struct amd_io_pgtable * pgtable,unsigned long address,unsigned long * page_size)255fd86c950SSuravee Suthikulpanit static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
25618954252SSuravee Suthikulpanit 		      unsigned long address,
25718954252SSuravee Suthikulpanit 		      unsigned long *page_size)
25818954252SSuravee Suthikulpanit {
25918954252SSuravee Suthikulpanit 	int level;
26018954252SSuravee Suthikulpanit 	u64 *pte;
26118954252SSuravee Suthikulpanit 
26218954252SSuravee Suthikulpanit 	*page_size = 0;
26318954252SSuravee Suthikulpanit 
2640633bbccSSuravee Suthikulpanit 	if (address > PM_LEVEL_SIZE(pgtable->mode))
26518954252SSuravee Suthikulpanit 		return NULL;
26618954252SSuravee Suthikulpanit 
2670633bbccSSuravee Suthikulpanit 	level	   =  pgtable->mode - 1;
2680633bbccSSuravee Suthikulpanit 	pte	   = &pgtable->root[PM_LEVEL_INDEX(level, address)];
26918954252SSuravee Suthikulpanit 	*page_size =  PTE_LEVEL_PAGE_SIZE(level);
27018954252SSuravee Suthikulpanit 
27118954252SSuravee Suthikulpanit 	while (level > 0) {
27218954252SSuravee Suthikulpanit 
27318954252SSuravee Suthikulpanit 		/* Not Present */
27418954252SSuravee Suthikulpanit 		if (!IOMMU_PTE_PRESENT(*pte))
27518954252SSuravee Suthikulpanit 			return NULL;
27618954252SSuravee Suthikulpanit 
27718954252SSuravee Suthikulpanit 		/* Large PTE */
278354440a7SJerry Snitselaar 		if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
279354440a7SJerry Snitselaar 		    PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
28018954252SSuravee Suthikulpanit 			break;
28118954252SSuravee Suthikulpanit 
28218954252SSuravee Suthikulpanit 		/* No level skipping support yet */
28318954252SSuravee Suthikulpanit 		if (PM_PTE_LEVEL(*pte) != level)
28418954252SSuravee Suthikulpanit 			return NULL;
28518954252SSuravee Suthikulpanit 
28618954252SSuravee Suthikulpanit 		level -= 1;
28718954252SSuravee Suthikulpanit 
28818954252SSuravee Suthikulpanit 		/* Walk to the next level */
28918954252SSuravee Suthikulpanit 		pte	   = IOMMU_PTE_PAGE(*pte);
29018954252SSuravee Suthikulpanit 		pte	   = &pte[PM_LEVEL_INDEX(level, address)];
29118954252SSuravee Suthikulpanit 		*page_size = PTE_LEVEL_PAGE_SIZE(level);
29218954252SSuravee Suthikulpanit 	}
29318954252SSuravee Suthikulpanit 
29418954252SSuravee Suthikulpanit 	/*
29518954252SSuravee Suthikulpanit 	 * If we have a series of large PTEs, make
29618954252SSuravee Suthikulpanit 	 * sure to return a pointer to the first one.
29718954252SSuravee Suthikulpanit 	 */
29818954252SSuravee Suthikulpanit 	if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
29918954252SSuravee Suthikulpanit 		pte = first_pte_l7(pte, page_size, NULL);
30018954252SSuravee Suthikulpanit 
30118954252SSuravee Suthikulpanit 	return pte;
30218954252SSuravee Suthikulpanit }
30318954252SSuravee Suthikulpanit 
free_clear_pte(u64 * pte,u64 pteval,struct list_head * freelist)304ce00eeceSMatthew Wilcox (Oracle) static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
30518954252SSuravee Suthikulpanit {
3066b3106e9SRobin Murphy 	u64 *pt;
30718954252SSuravee Suthikulpanit 	int mode;
30818954252SSuravee Suthikulpanit 
3090d10fe75SUros Bizjak 	while (!try_cmpxchg64(pte, &pteval, 0))
31018954252SSuravee Suthikulpanit 		pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
31118954252SSuravee Suthikulpanit 
31218954252SSuravee Suthikulpanit 	if (!IOMMU_PTE_PRESENT(pteval))
313ce00eeceSMatthew Wilcox (Oracle) 		return;
31418954252SSuravee Suthikulpanit 
3156b3106e9SRobin Murphy 	pt   = IOMMU_PTE_PAGE(pteval);
31618954252SSuravee Suthikulpanit 	mode = IOMMU_PTE_MODE(pteval);
31718954252SSuravee Suthikulpanit 
318ce00eeceSMatthew Wilcox (Oracle) 	free_sub_pt(pt, mode, freelist);
31918954252SSuravee Suthikulpanit }
32018954252SSuravee Suthikulpanit 
32118954252SSuravee Suthikulpanit /*
32218954252SSuravee Suthikulpanit  * Generic mapping functions. It maps a physical address into a DMA
32318954252SSuravee Suthikulpanit  * address space. It allocates the page table pages if necessary.
32418954252SSuravee Suthikulpanit  * In the future it can be extended to a generic mapping function
32518954252SSuravee Suthikulpanit  * supporting all features of AMD IOMMU page tables like level skipping
32618954252SSuravee Suthikulpanit  * and full 64 bit address spaces.
32718954252SSuravee Suthikulpanit  */
iommu_v1_map_pages(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)3288cc233deSVasant Hegde static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
3298cc233deSVasant Hegde 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
3308cc233deSVasant Hegde 			      int prot, gfp_t gfp, size_t *mapped)
33118954252SSuravee Suthikulpanit {
3329ac0b338SJason Gunthorpe 	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
333ce00eeceSMatthew Wilcox (Oracle) 	LIST_HEAD(freelist);
33418954252SSuravee Suthikulpanit 	bool updated = false;
33518954252SSuravee Suthikulpanit 	u64 __pte, *pte;
33618954252SSuravee Suthikulpanit 	int ret, i, count;
3372c535dd3SVasant Hegde 	size_t size = pgcount << __ffs(pgsize);
3382c535dd3SVasant Hegde 	unsigned long o_iova = iova;
33918954252SSuravee Suthikulpanit 
3408cc233deSVasant Hegde 	BUG_ON(!IS_ALIGNED(iova, pgsize));
3418cc233deSVasant Hegde 	BUG_ON(!IS_ALIGNED(paddr, pgsize));
34218954252SSuravee Suthikulpanit 
34318954252SSuravee Suthikulpanit 	ret = -EINVAL;
34418954252SSuravee Suthikulpanit 	if (!(prot & IOMMU_PROT_MASK))
34518954252SSuravee Suthikulpanit 		goto out;
34618954252SSuravee Suthikulpanit 
3478cc233deSVasant Hegde 	while (pgcount > 0) {
3488cc233deSVasant Hegde 		count = PAGE_SIZE_PTE_COUNT(pgsize);
3499ac0b338SJason Gunthorpe 		pte   = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
35018954252SSuravee Suthikulpanit 
35118954252SSuravee Suthikulpanit 		ret = -ENOMEM;
35218954252SSuravee Suthikulpanit 		if (!pte)
35318954252SSuravee Suthikulpanit 			goto out;
35418954252SSuravee Suthikulpanit 
35518954252SSuravee Suthikulpanit 		for (i = 0; i < count; ++i)
356ce00eeceSMatthew Wilcox (Oracle) 			free_clear_pte(&pte[i], pte[i], &freelist);
35718954252SSuravee Suthikulpanit 
358ce00eeceSMatthew Wilcox (Oracle) 		if (!list_empty(&freelist))
35918954252SSuravee Suthikulpanit 			updated = true;
36018954252SSuravee Suthikulpanit 
36118954252SSuravee Suthikulpanit 		if (count > 1) {
3628cc233deSVasant Hegde 			__pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize);
36318954252SSuravee Suthikulpanit 			__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
36418954252SSuravee Suthikulpanit 		} else
36533aef978SSuravee Suthikulpanit 			__pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
36618954252SSuravee Suthikulpanit 
36718954252SSuravee Suthikulpanit 		if (prot & IOMMU_PROT_IR)
36818954252SSuravee Suthikulpanit 			__pte |= IOMMU_PTE_IR;
36918954252SSuravee Suthikulpanit 		if (prot & IOMMU_PROT_IW)
37018954252SSuravee Suthikulpanit 			__pte |= IOMMU_PTE_IW;
37118954252SSuravee Suthikulpanit 
37218954252SSuravee Suthikulpanit 		for (i = 0; i < count; ++i)
37318954252SSuravee Suthikulpanit 			pte[i] = __pte;
37418954252SSuravee Suthikulpanit 
3758cc233deSVasant Hegde 		iova  += pgsize;
3768cc233deSVasant Hegde 		paddr += pgsize;
3778cc233deSVasant Hegde 		pgcount--;
3788cc233deSVasant Hegde 		if (mapped)
3798cc233deSVasant Hegde 			*mapped += pgsize;
3808cc233deSVasant Hegde 	}
3818cc233deSVasant Hegde 
38218954252SSuravee Suthikulpanit 	ret = 0;
38318954252SSuravee Suthikulpanit 
38418954252SSuravee Suthikulpanit out:
38518954252SSuravee Suthikulpanit 	if (updated) {
3869ac0b338SJason Gunthorpe 		struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
38718954252SSuravee Suthikulpanit 		unsigned long flags;
38818954252SSuravee Suthikulpanit 
38918954252SSuravee Suthikulpanit 		spin_lock_irqsave(&dom->lock, flags);
39018954252SSuravee Suthikulpanit 		/*
39118954252SSuravee Suthikulpanit 		 * Flush domain TLB(s) and wait for completion. Any Device-Table
39218954252SSuravee Suthikulpanit 		 * Updates and flushing already happened in
39318954252SSuravee Suthikulpanit 		 * increase_address_space().
39418954252SSuravee Suthikulpanit 		 */
3952c535dd3SVasant Hegde 		amd_iommu_domain_flush_pages(dom, o_iova, size);
39618954252SSuravee Suthikulpanit 		spin_unlock_irqrestore(&dom->lock, flags);
39718954252SSuravee Suthikulpanit 	}
39818954252SSuravee Suthikulpanit 
39918954252SSuravee Suthikulpanit 	/* Everything flushed out, free pages now */
40075114cbaSPasha Tatashin 	iommu_put_pages_list(&freelist);
40118954252SSuravee Suthikulpanit 
40218954252SSuravee Suthikulpanit 	return ret;
40318954252SSuravee Suthikulpanit }
40418954252SSuravee Suthikulpanit 
iommu_v1_unmap_pages(struct io_pgtable_ops * ops,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)405251c4db6SVasant Hegde static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops,
40633aef978SSuravee Suthikulpanit 					  unsigned long iova,
407251c4db6SVasant Hegde 					  size_t pgsize, size_t pgcount,
408fd86c950SSuravee Suthikulpanit 					  struct iommu_iotlb_gather *gather)
40918954252SSuravee Suthikulpanit {
4100633bbccSSuravee Suthikulpanit 	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
41118954252SSuravee Suthikulpanit 	unsigned long long unmapped;
41218954252SSuravee Suthikulpanit 	unsigned long unmap_size;
41318954252SSuravee Suthikulpanit 	u64 *pte;
414251c4db6SVasant Hegde 	size_t size = pgcount << __ffs(pgsize);
41518954252SSuravee Suthikulpanit 
416251c4db6SVasant Hegde 	BUG_ON(!is_power_of_2(pgsize));
41718954252SSuravee Suthikulpanit 
41818954252SSuravee Suthikulpanit 	unmapped = 0;
41918954252SSuravee Suthikulpanit 
42033aef978SSuravee Suthikulpanit 	while (unmapped < size) {
4210633bbccSSuravee Suthikulpanit 		pte = fetch_pte(pgtable, iova, &unmap_size);
42218954252SSuravee Suthikulpanit 		if (pte) {
42318954252SSuravee Suthikulpanit 			int i, count;
42418954252SSuravee Suthikulpanit 
42518954252SSuravee Suthikulpanit 			count = PAGE_SIZE_PTE_COUNT(unmap_size);
42618954252SSuravee Suthikulpanit 			for (i = 0; i < count; i++)
42718954252SSuravee Suthikulpanit 				pte[i] = 0ULL;
428251c4db6SVasant Hegde 		} else {
429251c4db6SVasant Hegde 			return unmapped;
43018954252SSuravee Suthikulpanit 		}
43118954252SSuravee Suthikulpanit 
43233aef978SSuravee Suthikulpanit 		iova = (iova & ~(unmap_size - 1)) + unmap_size;
43318954252SSuravee Suthikulpanit 		unmapped += unmap_size;
43418954252SSuravee Suthikulpanit 	}
43518954252SSuravee Suthikulpanit 
43618954252SSuravee Suthikulpanit 	return unmapped;
43718954252SSuravee Suthikulpanit }
43818954252SSuravee Suthikulpanit 
iommu_v1_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)439441555c6SSuravee Suthikulpanit static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
440441555c6SSuravee Suthikulpanit {
441441555c6SSuravee Suthikulpanit 	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
442441555c6SSuravee Suthikulpanit 	unsigned long offset_mask, pte_pgsize;
443441555c6SSuravee Suthikulpanit 	u64 *pte, __pte;
444441555c6SSuravee Suthikulpanit 
445441555c6SSuravee Suthikulpanit 	pte = fetch_pte(pgtable, iova, &pte_pgsize);
446441555c6SSuravee Suthikulpanit 
447441555c6SSuravee Suthikulpanit 	if (!pte || !IOMMU_PTE_PRESENT(*pte))
448441555c6SSuravee Suthikulpanit 		return 0;
449441555c6SSuravee Suthikulpanit 
450441555c6SSuravee Suthikulpanit 	offset_mask = pte_pgsize - 1;
451441555c6SSuravee Suthikulpanit 	__pte	    = __sme_clr(*pte & PM_ADDR_MASK);
452441555c6SSuravee Suthikulpanit 
453441555c6SSuravee Suthikulpanit 	return (__pte & ~offset_mask) | (iova & offset_mask);
454441555c6SSuravee Suthikulpanit }
455441555c6SSuravee Suthikulpanit 
pte_test_and_clear_dirty(u64 * ptep,unsigned long size,unsigned long flags)456421a511aSJoao Martins static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
457421a511aSJoao Martins 				     unsigned long flags)
458421a511aSJoao Martins {
459421a511aSJoao Martins 	bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
460421a511aSJoao Martins 	bool dirty = false;
461421a511aSJoao Martins 	int i, count;
462421a511aSJoao Martins 
463421a511aSJoao Martins 	/*
464421a511aSJoao Martins 	 * 2.2.3.2 Host Dirty Support
465421a511aSJoao Martins 	 * When a non-default page size is used , software must OR the
466421a511aSJoao Martins 	 * Dirty bits in all of the replicated host PTEs used to map
467421a511aSJoao Martins 	 * the page. The IOMMU does not guarantee the Dirty bits are
468421a511aSJoao Martins 	 * set in all of the replicated PTEs. Any portion of the page
469421a511aSJoao Martins 	 * may have been written even if the Dirty bit is set in only
470421a511aSJoao Martins 	 * one of the replicated PTEs.
471421a511aSJoao Martins 	 */
472421a511aSJoao Martins 	count = PAGE_SIZE_PTE_COUNT(size);
473421a511aSJoao Martins 	for (i = 0; i < count && test_only; i++) {
474421a511aSJoao Martins 		if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
475421a511aSJoao Martins 			dirty = true;
476421a511aSJoao Martins 			break;
477421a511aSJoao Martins 		}
478421a511aSJoao Martins 	}
479421a511aSJoao Martins 
480421a511aSJoao Martins 	for (i = 0; i < count && !test_only; i++) {
481421a511aSJoao Martins 		if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
482421a511aSJoao Martins 				       (unsigned long *)&ptep[i])) {
483421a511aSJoao Martins 			dirty = true;
484421a511aSJoao Martins 		}
485421a511aSJoao Martins 	}
486421a511aSJoao Martins 
487421a511aSJoao Martins 	return dirty;
488421a511aSJoao Martins }
489421a511aSJoao Martins 
iommu_v1_read_and_clear_dirty(struct io_pgtable_ops * ops,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)490421a511aSJoao Martins static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
491421a511aSJoao Martins 					 unsigned long iova, size_t size,
492421a511aSJoao Martins 					 unsigned long flags,
493421a511aSJoao Martins 					 struct iommu_dirty_bitmap *dirty)
494421a511aSJoao Martins {
495421a511aSJoao Martins 	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
496421a511aSJoao Martins 	unsigned long end = iova + size - 1;
497421a511aSJoao Martins 
498421a511aSJoao Martins 	do {
499421a511aSJoao Martins 		unsigned long pgsize = 0;
500421a511aSJoao Martins 		u64 *ptep, pte;
501421a511aSJoao Martins 
502421a511aSJoao Martins 		ptep = fetch_pte(pgtable, iova, &pgsize);
503421a511aSJoao Martins 		if (ptep)
504421a511aSJoao Martins 			pte = READ_ONCE(*ptep);
505421a511aSJoao Martins 		if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
506421a511aSJoao Martins 			pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
507421a511aSJoao Martins 			iova += pgsize;
508421a511aSJoao Martins 			continue;
509421a511aSJoao Martins 		}
510421a511aSJoao Martins 
511421a511aSJoao Martins 		/*
512421a511aSJoao Martins 		 * Mark the whole IOVA range as dirty even if only one of
513421a511aSJoao Martins 		 * the replicated PTEs were marked dirty.
514421a511aSJoao Martins 		 */
515421a511aSJoao Martins 		if (pte_test_and_clear_dirty(ptep, pgsize, flags))
516421a511aSJoao Martins 			iommu_dirty_bitmap_record(dirty, iova, pgsize);
517421a511aSJoao Martins 		iova += pgsize;
518421a511aSJoao Martins 	} while (iova < end);
519421a511aSJoao Martins 
520421a511aSJoao Martins 	return 0;
521421a511aSJoao Martins }
522421a511aSJoao Martins 
52318954252SSuravee Suthikulpanit /*
524c9b258c6SSuravee Suthikulpanit  * ----------------------------------------------------
525c9b258c6SSuravee Suthikulpanit  */
v1_free_pgtable(struct io_pgtable * iop)526c9b258c6SSuravee Suthikulpanit static void v1_free_pgtable(struct io_pgtable *iop)
527c9b258c6SSuravee Suthikulpanit {
528670b5779SJason Gunthorpe 	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
529ce00eeceSMatthew Wilcox (Oracle) 	LIST_HEAD(freelist);
530e42ba063SSuravee Suthikulpanit 
531e42ba063SSuravee Suthikulpanit 	if (pgtable->mode == PAGE_MODE_NONE)
532e42ba063SSuravee Suthikulpanit 		return;
533e42ba063SSuravee Suthikulpanit 
534e42ba063SSuravee Suthikulpanit 	/* Page-table is not visible to IOMMU anymore, so free it */
535e42ba063SSuravee Suthikulpanit 	BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
536e42ba063SSuravee Suthikulpanit 	       pgtable->mode > PAGE_MODE_6_LEVEL);
537e42ba063SSuravee Suthikulpanit 
538ce00eeceSMatthew Wilcox (Oracle) 	free_sub_pt(pgtable->root, pgtable->mode, &freelist);
5398d00b77aSJason Gunthorpe 	iommu_put_pages_list(&freelist);
540c9b258c6SSuravee Suthikulpanit }
541c9b258c6SSuravee Suthikulpanit 
v1_alloc_pgtable(struct io_pgtable_cfg * cfg,void * cookie)542c9b258c6SSuravee Suthikulpanit static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
543c9b258c6SSuravee Suthikulpanit {
544c9b258c6SSuravee Suthikulpanit 	struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
545c9b258c6SSuravee Suthikulpanit 
54647f218d1SJason Gunthorpe 	pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
5478d00b77aSJason Gunthorpe 	if (!pgtable->root)
5488d00b77aSJason Gunthorpe 		return NULL;
5498d00b77aSJason Gunthorpe 	pgtable->mode = PAGE_MODE_3_LEVEL;
5508d00b77aSJason Gunthorpe 
551*f0295913SJoerg Roedel 	cfg->pgsize_bitmap  = amd_iommu_pgsize_bitmap;
55286c5eac3SChen Ni 	cfg->ias            = IOMMU_IN_ADDR_BIT_SIZE;
55386c5eac3SChen Ni 	cfg->oas            = IOMMU_OUT_ADDR_BIT_SIZE;
554c9b258c6SSuravee Suthikulpanit 
555670b5779SJason Gunthorpe 	pgtable->pgtbl.ops.map_pages    = iommu_v1_map_pages;
556670b5779SJason Gunthorpe 	pgtable->pgtbl.ops.unmap_pages  = iommu_v1_unmap_pages;
557670b5779SJason Gunthorpe 	pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
558670b5779SJason Gunthorpe 	pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
559441555c6SSuravee Suthikulpanit 
560670b5779SJason Gunthorpe 	return &pgtable->pgtbl;
561c9b258c6SSuravee Suthikulpanit }
562c9b258c6SSuravee Suthikulpanit 
563c9b258c6SSuravee Suthikulpanit struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
564c9b258c6SSuravee Suthikulpanit 	.alloc	= v1_alloc_pgtable,
565c9b258c6SSuravee Suthikulpanit 	.free	= v1_free_pgtable,
566c9b258c6SSuravee Suthikulpanit };
567