1c9b258c6SSuravee Suthikulpanit // SPDX-License-Identifier: GPL-2.0-only
2c9b258c6SSuravee Suthikulpanit /*
3c9b258c6SSuravee Suthikulpanit * CPU-agnostic AMD IO page table allocator.
4c9b258c6SSuravee Suthikulpanit *
5c9b258c6SSuravee Suthikulpanit * Copyright (C) 2020 Advanced Micro Devices, Inc.
6c9b258c6SSuravee Suthikulpanit * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
7c9b258c6SSuravee Suthikulpanit */
8c9b258c6SSuravee Suthikulpanit
9c9b258c6SSuravee Suthikulpanit #define pr_fmt(fmt) "AMD-Vi: " fmt
10c9b258c6SSuravee Suthikulpanit #define dev_fmt(fmt) pr_fmt(fmt)
11c9b258c6SSuravee Suthikulpanit
12c9b258c6SSuravee Suthikulpanit #include <linux/atomic.h>
13c9b258c6SSuravee Suthikulpanit #include <linux/bitops.h>
14c9b258c6SSuravee Suthikulpanit #include <linux/io-pgtable.h>
15c9b258c6SSuravee Suthikulpanit #include <linux/kernel.h>
16c9b258c6SSuravee Suthikulpanit #include <linux/sizes.h>
17c9b258c6SSuravee Suthikulpanit #include <linux/slab.h>
18c9b258c6SSuravee Suthikulpanit #include <linux/types.h>
19c9b258c6SSuravee Suthikulpanit #include <linux/dma-mapping.h>
20c9b258c6SSuravee Suthikulpanit
21c9b258c6SSuravee Suthikulpanit #include <asm/barrier.h>
22c9b258c6SSuravee Suthikulpanit
23c9b258c6SSuravee Suthikulpanit #include "amd_iommu_types.h"
24c9b258c6SSuravee Suthikulpanit #include "amd_iommu.h"
2575114cbaSPasha Tatashin #include "../iommu-pages.h"
26c9b258c6SSuravee Suthikulpanit
27c9b258c6SSuravee Suthikulpanit /*
2818954252SSuravee Suthikulpanit * Helper function to get the first pte of a large mapping
2918954252SSuravee Suthikulpanit */
first_pte_l7(u64 * pte,unsigned long * page_size,unsigned long * count)3018954252SSuravee Suthikulpanit static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
3118954252SSuravee Suthikulpanit unsigned long *count)
3218954252SSuravee Suthikulpanit {
3318954252SSuravee Suthikulpanit unsigned long pte_mask, pg_size, cnt;
3418954252SSuravee Suthikulpanit u64 *fpte;
3518954252SSuravee Suthikulpanit
3618954252SSuravee Suthikulpanit pg_size = PTE_PAGE_SIZE(*pte);
3718954252SSuravee Suthikulpanit cnt = PAGE_SIZE_PTE_COUNT(pg_size);
3818954252SSuravee Suthikulpanit pte_mask = ~((cnt << 3) - 1);
3918954252SSuravee Suthikulpanit fpte = (u64 *)(((unsigned long)pte) & pte_mask);
4018954252SSuravee Suthikulpanit
4118954252SSuravee Suthikulpanit if (page_size)
4218954252SSuravee Suthikulpanit *page_size = pg_size;
4318954252SSuravee Suthikulpanit
4418954252SSuravee Suthikulpanit if (count)
4518954252SSuravee Suthikulpanit *count = cnt;
4618954252SSuravee Suthikulpanit
4718954252SSuravee Suthikulpanit return fpte;
4818954252SSuravee Suthikulpanit }
4918954252SSuravee Suthikulpanit
5018954252SSuravee Suthikulpanit /****************************************************************************
5118954252SSuravee Suthikulpanit *
5218954252SSuravee Suthikulpanit * The functions below are used the create the page table mappings for
5318954252SSuravee Suthikulpanit * unity mapped regions.
5418954252SSuravee Suthikulpanit *
5518954252SSuravee Suthikulpanit ****************************************************************************/
5618954252SSuravee Suthikulpanit
free_pt_page(u64 * pt,struct list_head * freelist)57ce00eeceSMatthew Wilcox (Oracle) static void free_pt_page(u64 *pt, struct list_head *freelist)
5818954252SSuravee Suthikulpanit {
596b3106e9SRobin Murphy struct page *p = virt_to_page(pt);
6018954252SSuravee Suthikulpanit
61ce00eeceSMatthew Wilcox (Oracle) list_add_tail(&p->lru, freelist);
6218954252SSuravee Suthikulpanit }
6318954252SSuravee Suthikulpanit
free_pt_lvl(u64 * pt,struct list_head * freelist,int lvl)64ce00eeceSMatthew Wilcox (Oracle) static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
656b3106e9SRobin Murphy {
666b3106e9SRobin Murphy u64 *p;
676b3106e9SRobin Murphy int i;
686b3106e9SRobin Murphy
696b3106e9SRobin Murphy for (i = 0; i < 512; ++i) {
706b3106e9SRobin Murphy /* PTE present? */
716b3106e9SRobin Murphy if (!IOMMU_PTE_PRESENT(pt[i]))
726b3106e9SRobin Murphy continue;
736b3106e9SRobin Murphy
746b3106e9SRobin Murphy /* Large PTE? */
756b3106e9SRobin Murphy if (PM_PTE_LEVEL(pt[i]) == 0 ||
766b3106e9SRobin Murphy PM_PTE_LEVEL(pt[i]) == 7)
776b3106e9SRobin Murphy continue;
786b3106e9SRobin Murphy
796b3106e9SRobin Murphy /*
806b3106e9SRobin Murphy * Free the next level. No need to look at l1 tables here since
816b3106e9SRobin Murphy * they can only contain leaf PTEs; just free them directly.
826b3106e9SRobin Murphy */
836b3106e9SRobin Murphy p = IOMMU_PTE_PAGE(pt[i]);
846b3106e9SRobin Murphy if (lvl > 2)
85ce00eeceSMatthew Wilcox (Oracle) free_pt_lvl(p, freelist, lvl - 1);
866b3106e9SRobin Murphy else
87ce00eeceSMatthew Wilcox (Oracle) free_pt_page(p, freelist);
8818954252SSuravee Suthikulpanit }
8918954252SSuravee Suthikulpanit
90ce00eeceSMatthew Wilcox (Oracle) free_pt_page(pt, freelist);
916b3106e9SRobin Murphy }
9218954252SSuravee Suthikulpanit
free_sub_pt(u64 * root,int mode,struct list_head * freelist)93ce00eeceSMatthew Wilcox (Oracle) static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
9418954252SSuravee Suthikulpanit {
9518954252SSuravee Suthikulpanit switch (mode) {
9618954252SSuravee Suthikulpanit case PAGE_MODE_NONE:
9718954252SSuravee Suthikulpanit case PAGE_MODE_7_LEVEL:
9818954252SSuravee Suthikulpanit break;
9918954252SSuravee Suthikulpanit case PAGE_MODE_1_LEVEL:
100ce00eeceSMatthew Wilcox (Oracle) free_pt_page(root, freelist);
10118954252SSuravee Suthikulpanit break;
10218954252SSuravee Suthikulpanit case PAGE_MODE_2_LEVEL:
10318954252SSuravee Suthikulpanit case PAGE_MODE_3_LEVEL:
10418954252SSuravee Suthikulpanit case PAGE_MODE_4_LEVEL:
10518954252SSuravee Suthikulpanit case PAGE_MODE_5_LEVEL:
10618954252SSuravee Suthikulpanit case PAGE_MODE_6_LEVEL:
1076b3106e9SRobin Murphy free_pt_lvl(root, freelist, mode);
10818954252SSuravee Suthikulpanit break;
10918954252SSuravee Suthikulpanit default:
11018954252SSuravee Suthikulpanit BUG();
11118954252SSuravee Suthikulpanit }
11218954252SSuravee Suthikulpanit }
11318954252SSuravee Suthikulpanit
11418954252SSuravee Suthikulpanit /*
11518954252SSuravee Suthikulpanit * This function is used to add another level to an IO page table. Adding
11618954252SSuravee Suthikulpanit * another level increases the size of the address space by 9 bits to a size up
11718954252SSuravee Suthikulpanit * to 64 bits.
11818954252SSuravee Suthikulpanit */
increase_address_space(struct amd_io_pgtable * pgtable,unsigned long address,gfp_t gfp)1199ac0b338SJason Gunthorpe static bool increase_address_space(struct amd_io_pgtable *pgtable,
12018954252SSuravee Suthikulpanit unsigned long address,
12118954252SSuravee Suthikulpanit gfp_t gfp)
12218954252SSuravee Suthikulpanit {
1239ac0b338SJason Gunthorpe struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
1249ac0b338SJason Gunthorpe struct protection_domain *domain =
1259ac0b338SJason Gunthorpe container_of(pgtable, struct protection_domain, iop);
12618954252SSuravee Suthikulpanit unsigned long flags;
12718954252SSuravee Suthikulpanit bool ret = true;
12818954252SSuravee Suthikulpanit u64 *pte;
12918954252SSuravee Suthikulpanit
13047f218d1SJason Gunthorpe pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
131140456f9SAndrey Ryabinin if (!pte)
132140456f9SAndrey Ryabinin return false;
133140456f9SAndrey Ryabinin
13418954252SSuravee Suthikulpanit spin_lock_irqsave(&domain->lock, flags);
13518954252SSuravee Suthikulpanit
1369ac0b338SJason Gunthorpe if (address <= PM_LEVEL_SIZE(pgtable->mode))
13718954252SSuravee Suthikulpanit goto out;
13818954252SSuravee Suthikulpanit
13918954252SSuravee Suthikulpanit ret = false;
1409ac0b338SJason Gunthorpe if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
14118954252SSuravee Suthikulpanit goto out;
14218954252SSuravee Suthikulpanit
1439ac0b338SJason Gunthorpe *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
14418954252SSuravee Suthikulpanit
1459ac0b338SJason Gunthorpe pgtable->root = pte;
1469ac0b338SJason Gunthorpe pgtable->mode += 1;
14718954252SSuravee Suthikulpanit amd_iommu_update_and_flush_device_table(domain);
14818954252SSuravee Suthikulpanit
149140456f9SAndrey Ryabinin pte = NULL;
15018954252SSuravee Suthikulpanit ret = true;
15118954252SSuravee Suthikulpanit
15218954252SSuravee Suthikulpanit out:
15318954252SSuravee Suthikulpanit spin_unlock_irqrestore(&domain->lock, flags);
15475114cbaSPasha Tatashin iommu_free_page(pte);
15518954252SSuravee Suthikulpanit
15618954252SSuravee Suthikulpanit return ret;
15718954252SSuravee Suthikulpanit }
15818954252SSuravee Suthikulpanit
alloc_pte(struct amd_io_pgtable * pgtable,unsigned long address,unsigned long page_size,u64 ** pte_page,gfp_t gfp,bool * updated)1599ac0b338SJason Gunthorpe static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
16018954252SSuravee Suthikulpanit unsigned long address,
16118954252SSuravee Suthikulpanit unsigned long page_size,
16218954252SSuravee Suthikulpanit u64 **pte_page,
16318954252SSuravee Suthikulpanit gfp_t gfp,
16418954252SSuravee Suthikulpanit bool *updated)
16518954252SSuravee Suthikulpanit {
1669ac0b338SJason Gunthorpe struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
16718954252SSuravee Suthikulpanit int level, end_lvl;
16818954252SSuravee Suthikulpanit u64 *pte, *page;
16918954252SSuravee Suthikulpanit
17018954252SSuravee Suthikulpanit BUG_ON(!is_power_of_2(page_size));
17118954252SSuravee Suthikulpanit
1729ac0b338SJason Gunthorpe while (address > PM_LEVEL_SIZE(pgtable->mode)) {
17318954252SSuravee Suthikulpanit /*
17418954252SSuravee Suthikulpanit * Return an error if there is no memory to update the
17518954252SSuravee Suthikulpanit * page-table.
17618954252SSuravee Suthikulpanit */
1779ac0b338SJason Gunthorpe if (!increase_address_space(pgtable, address, gfp))
17818954252SSuravee Suthikulpanit return NULL;
17918954252SSuravee Suthikulpanit }
18018954252SSuravee Suthikulpanit
18118954252SSuravee Suthikulpanit
1829ac0b338SJason Gunthorpe level = pgtable->mode - 1;
1839ac0b338SJason Gunthorpe pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
18418954252SSuravee Suthikulpanit address = PAGE_SIZE_ALIGN(address, page_size);
18518954252SSuravee Suthikulpanit end_lvl = PAGE_SIZE_LEVEL(page_size);
18618954252SSuravee Suthikulpanit
18718954252SSuravee Suthikulpanit while (level > end_lvl) {
18818954252SSuravee Suthikulpanit u64 __pte, __npte;
18918954252SSuravee Suthikulpanit int pte_level;
19018954252SSuravee Suthikulpanit
19118954252SSuravee Suthikulpanit __pte = *pte;
19218954252SSuravee Suthikulpanit pte_level = PM_PTE_LEVEL(__pte);
19318954252SSuravee Suthikulpanit
19418954252SSuravee Suthikulpanit /*
19518954252SSuravee Suthikulpanit * If we replace a series of large PTEs, we need
19618954252SSuravee Suthikulpanit * to tear down all of them.
19718954252SSuravee Suthikulpanit */
19818954252SSuravee Suthikulpanit if (IOMMU_PTE_PRESENT(__pte) &&
19918954252SSuravee Suthikulpanit pte_level == PAGE_MODE_7_LEVEL) {
20018954252SSuravee Suthikulpanit unsigned long count, i;
20118954252SSuravee Suthikulpanit u64 *lpte;
20218954252SSuravee Suthikulpanit
20318954252SSuravee Suthikulpanit lpte = first_pte_l7(pte, NULL, &count);
20418954252SSuravee Suthikulpanit
20518954252SSuravee Suthikulpanit /*
20618954252SSuravee Suthikulpanit * Unmap the replicated PTEs that still match the
20718954252SSuravee Suthikulpanit * original large mapping
20818954252SSuravee Suthikulpanit */
20918954252SSuravee Suthikulpanit for (i = 0; i < count; ++i)
21018954252SSuravee Suthikulpanit cmpxchg64(&lpte[i], __pte, 0ULL);
21118954252SSuravee Suthikulpanit
21218954252SSuravee Suthikulpanit *updated = true;
21318954252SSuravee Suthikulpanit continue;
21418954252SSuravee Suthikulpanit }
21518954252SSuravee Suthikulpanit
21618954252SSuravee Suthikulpanit if (!IOMMU_PTE_PRESENT(__pte) ||
21718954252SSuravee Suthikulpanit pte_level == PAGE_MODE_NONE) {
21847f218d1SJason Gunthorpe page = iommu_alloc_page_node(cfg->amd.nid, gfp);
21918954252SSuravee Suthikulpanit
22018954252SSuravee Suthikulpanit if (!page)
22118954252SSuravee Suthikulpanit return NULL;
22218954252SSuravee Suthikulpanit
22318954252SSuravee Suthikulpanit __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
22418954252SSuravee Suthikulpanit
22518954252SSuravee Suthikulpanit /* pte could have been changed somewhere. */
2260d10fe75SUros Bizjak if (!try_cmpxchg64(pte, &__pte, __npte))
22775114cbaSPasha Tatashin iommu_free_page(page);
22818954252SSuravee Suthikulpanit else if (IOMMU_PTE_PRESENT(__pte))
22918954252SSuravee Suthikulpanit *updated = true;
23018954252SSuravee Suthikulpanit
23118954252SSuravee Suthikulpanit continue;
23218954252SSuravee Suthikulpanit }
23318954252SSuravee Suthikulpanit
23418954252SSuravee Suthikulpanit /* No level skipping support yet */
23518954252SSuravee Suthikulpanit if (pte_level != level)
23618954252SSuravee Suthikulpanit return NULL;
23718954252SSuravee Suthikulpanit
23818954252SSuravee Suthikulpanit level -= 1;
23918954252SSuravee Suthikulpanit
24018954252SSuravee Suthikulpanit pte = IOMMU_PTE_PAGE(__pte);
24118954252SSuravee Suthikulpanit
24218954252SSuravee Suthikulpanit if (pte_page && level == end_lvl)
24318954252SSuravee Suthikulpanit *pte_page = pte;
24418954252SSuravee Suthikulpanit
24518954252SSuravee Suthikulpanit pte = &pte[PM_LEVEL_INDEX(level, address)];
24618954252SSuravee Suthikulpanit }
24718954252SSuravee Suthikulpanit
24818954252SSuravee Suthikulpanit return pte;
24918954252SSuravee Suthikulpanit }
25018954252SSuravee Suthikulpanit
25118954252SSuravee Suthikulpanit /*
25218954252SSuravee Suthikulpanit * This function checks if there is a PTE for a given dma address. If
25318954252SSuravee Suthikulpanit * there is one, it returns the pointer to it.
25418954252SSuravee Suthikulpanit */
fetch_pte(struct amd_io_pgtable * pgtable,unsigned long address,unsigned long * page_size)255fd86c950SSuravee Suthikulpanit static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
25618954252SSuravee Suthikulpanit unsigned long address,
25718954252SSuravee Suthikulpanit unsigned long *page_size)
25818954252SSuravee Suthikulpanit {
25918954252SSuravee Suthikulpanit int level;
26018954252SSuravee Suthikulpanit u64 *pte;
26118954252SSuravee Suthikulpanit
26218954252SSuravee Suthikulpanit *page_size = 0;
26318954252SSuravee Suthikulpanit
2640633bbccSSuravee Suthikulpanit if (address > PM_LEVEL_SIZE(pgtable->mode))
26518954252SSuravee Suthikulpanit return NULL;
26618954252SSuravee Suthikulpanit
2670633bbccSSuravee Suthikulpanit level = pgtable->mode - 1;
2680633bbccSSuravee Suthikulpanit pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
26918954252SSuravee Suthikulpanit *page_size = PTE_LEVEL_PAGE_SIZE(level);
27018954252SSuravee Suthikulpanit
27118954252SSuravee Suthikulpanit while (level > 0) {
27218954252SSuravee Suthikulpanit
27318954252SSuravee Suthikulpanit /* Not Present */
27418954252SSuravee Suthikulpanit if (!IOMMU_PTE_PRESENT(*pte))
27518954252SSuravee Suthikulpanit return NULL;
27618954252SSuravee Suthikulpanit
27718954252SSuravee Suthikulpanit /* Large PTE */
278354440a7SJerry Snitselaar if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
279354440a7SJerry Snitselaar PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
28018954252SSuravee Suthikulpanit break;
28118954252SSuravee Suthikulpanit
28218954252SSuravee Suthikulpanit /* No level skipping support yet */
28318954252SSuravee Suthikulpanit if (PM_PTE_LEVEL(*pte) != level)
28418954252SSuravee Suthikulpanit return NULL;
28518954252SSuravee Suthikulpanit
28618954252SSuravee Suthikulpanit level -= 1;
28718954252SSuravee Suthikulpanit
28818954252SSuravee Suthikulpanit /* Walk to the next level */
28918954252SSuravee Suthikulpanit pte = IOMMU_PTE_PAGE(*pte);
29018954252SSuravee Suthikulpanit pte = &pte[PM_LEVEL_INDEX(level, address)];
29118954252SSuravee Suthikulpanit *page_size = PTE_LEVEL_PAGE_SIZE(level);
29218954252SSuravee Suthikulpanit }
29318954252SSuravee Suthikulpanit
29418954252SSuravee Suthikulpanit /*
29518954252SSuravee Suthikulpanit * If we have a series of large PTEs, make
29618954252SSuravee Suthikulpanit * sure to return a pointer to the first one.
29718954252SSuravee Suthikulpanit */
29818954252SSuravee Suthikulpanit if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
29918954252SSuravee Suthikulpanit pte = first_pte_l7(pte, page_size, NULL);
30018954252SSuravee Suthikulpanit
30118954252SSuravee Suthikulpanit return pte;
30218954252SSuravee Suthikulpanit }
30318954252SSuravee Suthikulpanit
free_clear_pte(u64 * pte,u64 pteval,struct list_head * freelist)304ce00eeceSMatthew Wilcox (Oracle) static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
30518954252SSuravee Suthikulpanit {
3066b3106e9SRobin Murphy u64 *pt;
30718954252SSuravee Suthikulpanit int mode;
30818954252SSuravee Suthikulpanit
3090d10fe75SUros Bizjak while (!try_cmpxchg64(pte, &pteval, 0))
31018954252SSuravee Suthikulpanit pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
31118954252SSuravee Suthikulpanit
31218954252SSuravee Suthikulpanit if (!IOMMU_PTE_PRESENT(pteval))
313ce00eeceSMatthew Wilcox (Oracle) return;
31418954252SSuravee Suthikulpanit
3156b3106e9SRobin Murphy pt = IOMMU_PTE_PAGE(pteval);
31618954252SSuravee Suthikulpanit mode = IOMMU_PTE_MODE(pteval);
31718954252SSuravee Suthikulpanit
318ce00eeceSMatthew Wilcox (Oracle) free_sub_pt(pt, mode, freelist);
31918954252SSuravee Suthikulpanit }
32018954252SSuravee Suthikulpanit
32118954252SSuravee Suthikulpanit /*
32218954252SSuravee Suthikulpanit * Generic mapping functions. It maps a physical address into a DMA
32318954252SSuravee Suthikulpanit * address space. It allocates the page table pages if necessary.
32418954252SSuravee Suthikulpanit * In the future it can be extended to a generic mapping function
32518954252SSuravee Suthikulpanit * supporting all features of AMD IOMMU page tables like level skipping
32618954252SSuravee Suthikulpanit * and full 64 bit address spaces.
32718954252SSuravee Suthikulpanit */
iommu_v1_map_pages(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)3288cc233deSVasant Hegde static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
3298cc233deSVasant Hegde phys_addr_t paddr, size_t pgsize, size_t pgcount,
3308cc233deSVasant Hegde int prot, gfp_t gfp, size_t *mapped)
33118954252SSuravee Suthikulpanit {
3329ac0b338SJason Gunthorpe struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
333ce00eeceSMatthew Wilcox (Oracle) LIST_HEAD(freelist);
33418954252SSuravee Suthikulpanit bool updated = false;
33518954252SSuravee Suthikulpanit u64 __pte, *pte;
33618954252SSuravee Suthikulpanit int ret, i, count;
3372c535dd3SVasant Hegde size_t size = pgcount << __ffs(pgsize);
3382c535dd3SVasant Hegde unsigned long o_iova = iova;
33918954252SSuravee Suthikulpanit
3408cc233deSVasant Hegde BUG_ON(!IS_ALIGNED(iova, pgsize));
3418cc233deSVasant Hegde BUG_ON(!IS_ALIGNED(paddr, pgsize));
34218954252SSuravee Suthikulpanit
34318954252SSuravee Suthikulpanit ret = -EINVAL;
34418954252SSuravee Suthikulpanit if (!(prot & IOMMU_PROT_MASK))
34518954252SSuravee Suthikulpanit goto out;
34618954252SSuravee Suthikulpanit
3478cc233deSVasant Hegde while (pgcount > 0) {
3488cc233deSVasant Hegde count = PAGE_SIZE_PTE_COUNT(pgsize);
3499ac0b338SJason Gunthorpe pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
35018954252SSuravee Suthikulpanit
35118954252SSuravee Suthikulpanit ret = -ENOMEM;
35218954252SSuravee Suthikulpanit if (!pte)
35318954252SSuravee Suthikulpanit goto out;
35418954252SSuravee Suthikulpanit
35518954252SSuravee Suthikulpanit for (i = 0; i < count; ++i)
356ce00eeceSMatthew Wilcox (Oracle) free_clear_pte(&pte[i], pte[i], &freelist);
35718954252SSuravee Suthikulpanit
358ce00eeceSMatthew Wilcox (Oracle) if (!list_empty(&freelist))
35918954252SSuravee Suthikulpanit updated = true;
36018954252SSuravee Suthikulpanit
36118954252SSuravee Suthikulpanit if (count > 1) {
3628cc233deSVasant Hegde __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize);
36318954252SSuravee Suthikulpanit __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
36418954252SSuravee Suthikulpanit } else
36533aef978SSuravee Suthikulpanit __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
36618954252SSuravee Suthikulpanit
36718954252SSuravee Suthikulpanit if (prot & IOMMU_PROT_IR)
36818954252SSuravee Suthikulpanit __pte |= IOMMU_PTE_IR;
36918954252SSuravee Suthikulpanit if (prot & IOMMU_PROT_IW)
37018954252SSuravee Suthikulpanit __pte |= IOMMU_PTE_IW;
37118954252SSuravee Suthikulpanit
37218954252SSuravee Suthikulpanit for (i = 0; i < count; ++i)
37318954252SSuravee Suthikulpanit pte[i] = __pte;
37418954252SSuravee Suthikulpanit
3758cc233deSVasant Hegde iova += pgsize;
3768cc233deSVasant Hegde paddr += pgsize;
3778cc233deSVasant Hegde pgcount--;
3788cc233deSVasant Hegde if (mapped)
3798cc233deSVasant Hegde *mapped += pgsize;
3808cc233deSVasant Hegde }
3818cc233deSVasant Hegde
38218954252SSuravee Suthikulpanit ret = 0;
38318954252SSuravee Suthikulpanit
38418954252SSuravee Suthikulpanit out:
38518954252SSuravee Suthikulpanit if (updated) {
3869ac0b338SJason Gunthorpe struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
38718954252SSuravee Suthikulpanit unsigned long flags;
38818954252SSuravee Suthikulpanit
38918954252SSuravee Suthikulpanit spin_lock_irqsave(&dom->lock, flags);
39018954252SSuravee Suthikulpanit /*
39118954252SSuravee Suthikulpanit * Flush domain TLB(s) and wait for completion. Any Device-Table
39218954252SSuravee Suthikulpanit * Updates and flushing already happened in
39318954252SSuravee Suthikulpanit * increase_address_space().
39418954252SSuravee Suthikulpanit */
3952c535dd3SVasant Hegde amd_iommu_domain_flush_pages(dom, o_iova, size);
39618954252SSuravee Suthikulpanit spin_unlock_irqrestore(&dom->lock, flags);
39718954252SSuravee Suthikulpanit }
39818954252SSuravee Suthikulpanit
39918954252SSuravee Suthikulpanit /* Everything flushed out, free pages now */
40075114cbaSPasha Tatashin iommu_put_pages_list(&freelist);
40118954252SSuravee Suthikulpanit
40218954252SSuravee Suthikulpanit return ret;
40318954252SSuravee Suthikulpanit }
40418954252SSuravee Suthikulpanit
iommu_v1_unmap_pages(struct io_pgtable_ops * ops,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)405251c4db6SVasant Hegde static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops,
40633aef978SSuravee Suthikulpanit unsigned long iova,
407251c4db6SVasant Hegde size_t pgsize, size_t pgcount,
408fd86c950SSuravee Suthikulpanit struct iommu_iotlb_gather *gather)
40918954252SSuravee Suthikulpanit {
4100633bbccSSuravee Suthikulpanit struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
41118954252SSuravee Suthikulpanit unsigned long long unmapped;
41218954252SSuravee Suthikulpanit unsigned long unmap_size;
41318954252SSuravee Suthikulpanit u64 *pte;
414251c4db6SVasant Hegde size_t size = pgcount << __ffs(pgsize);
41518954252SSuravee Suthikulpanit
416251c4db6SVasant Hegde BUG_ON(!is_power_of_2(pgsize));
41718954252SSuravee Suthikulpanit
41818954252SSuravee Suthikulpanit unmapped = 0;
41918954252SSuravee Suthikulpanit
42033aef978SSuravee Suthikulpanit while (unmapped < size) {
4210633bbccSSuravee Suthikulpanit pte = fetch_pte(pgtable, iova, &unmap_size);
42218954252SSuravee Suthikulpanit if (pte) {
42318954252SSuravee Suthikulpanit int i, count;
42418954252SSuravee Suthikulpanit
42518954252SSuravee Suthikulpanit count = PAGE_SIZE_PTE_COUNT(unmap_size);
42618954252SSuravee Suthikulpanit for (i = 0; i < count; i++)
42718954252SSuravee Suthikulpanit pte[i] = 0ULL;
428251c4db6SVasant Hegde } else {
429251c4db6SVasant Hegde return unmapped;
43018954252SSuravee Suthikulpanit }
43118954252SSuravee Suthikulpanit
43233aef978SSuravee Suthikulpanit iova = (iova & ~(unmap_size - 1)) + unmap_size;
43318954252SSuravee Suthikulpanit unmapped += unmap_size;
43418954252SSuravee Suthikulpanit }
43518954252SSuravee Suthikulpanit
43618954252SSuravee Suthikulpanit return unmapped;
43718954252SSuravee Suthikulpanit }
43818954252SSuravee Suthikulpanit
iommu_v1_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)439441555c6SSuravee Suthikulpanit static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
440441555c6SSuravee Suthikulpanit {
441441555c6SSuravee Suthikulpanit struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
442441555c6SSuravee Suthikulpanit unsigned long offset_mask, pte_pgsize;
443441555c6SSuravee Suthikulpanit u64 *pte, __pte;
444441555c6SSuravee Suthikulpanit
445441555c6SSuravee Suthikulpanit pte = fetch_pte(pgtable, iova, &pte_pgsize);
446441555c6SSuravee Suthikulpanit
447441555c6SSuravee Suthikulpanit if (!pte || !IOMMU_PTE_PRESENT(*pte))
448441555c6SSuravee Suthikulpanit return 0;
449441555c6SSuravee Suthikulpanit
450441555c6SSuravee Suthikulpanit offset_mask = pte_pgsize - 1;
451441555c6SSuravee Suthikulpanit __pte = __sme_clr(*pte & PM_ADDR_MASK);
452441555c6SSuravee Suthikulpanit
453441555c6SSuravee Suthikulpanit return (__pte & ~offset_mask) | (iova & offset_mask);
454441555c6SSuravee Suthikulpanit }
455441555c6SSuravee Suthikulpanit
pte_test_and_clear_dirty(u64 * ptep,unsigned long size,unsigned long flags)456421a511aSJoao Martins static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
457421a511aSJoao Martins unsigned long flags)
458421a511aSJoao Martins {
459421a511aSJoao Martins bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
460421a511aSJoao Martins bool dirty = false;
461421a511aSJoao Martins int i, count;
462421a511aSJoao Martins
463421a511aSJoao Martins /*
464421a511aSJoao Martins * 2.2.3.2 Host Dirty Support
465421a511aSJoao Martins * When a non-default page size is used , software must OR the
466421a511aSJoao Martins * Dirty bits in all of the replicated host PTEs used to map
467421a511aSJoao Martins * the page. The IOMMU does not guarantee the Dirty bits are
468421a511aSJoao Martins * set in all of the replicated PTEs. Any portion of the page
469421a511aSJoao Martins * may have been written even if the Dirty bit is set in only
470421a511aSJoao Martins * one of the replicated PTEs.
471421a511aSJoao Martins */
472421a511aSJoao Martins count = PAGE_SIZE_PTE_COUNT(size);
473421a511aSJoao Martins for (i = 0; i < count && test_only; i++) {
474421a511aSJoao Martins if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
475421a511aSJoao Martins dirty = true;
476421a511aSJoao Martins break;
477421a511aSJoao Martins }
478421a511aSJoao Martins }
479421a511aSJoao Martins
480421a511aSJoao Martins for (i = 0; i < count && !test_only; i++) {
481421a511aSJoao Martins if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
482421a511aSJoao Martins (unsigned long *)&ptep[i])) {
483421a511aSJoao Martins dirty = true;
484421a511aSJoao Martins }
485421a511aSJoao Martins }
486421a511aSJoao Martins
487421a511aSJoao Martins return dirty;
488421a511aSJoao Martins }
489421a511aSJoao Martins
iommu_v1_read_and_clear_dirty(struct io_pgtable_ops * ops,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)490421a511aSJoao Martins static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
491421a511aSJoao Martins unsigned long iova, size_t size,
492421a511aSJoao Martins unsigned long flags,
493421a511aSJoao Martins struct iommu_dirty_bitmap *dirty)
494421a511aSJoao Martins {
495421a511aSJoao Martins struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
496421a511aSJoao Martins unsigned long end = iova + size - 1;
497421a511aSJoao Martins
498421a511aSJoao Martins do {
499421a511aSJoao Martins unsigned long pgsize = 0;
500421a511aSJoao Martins u64 *ptep, pte;
501421a511aSJoao Martins
502421a511aSJoao Martins ptep = fetch_pte(pgtable, iova, &pgsize);
503421a511aSJoao Martins if (ptep)
504421a511aSJoao Martins pte = READ_ONCE(*ptep);
505421a511aSJoao Martins if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
506421a511aSJoao Martins pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
507421a511aSJoao Martins iova += pgsize;
508421a511aSJoao Martins continue;
509421a511aSJoao Martins }
510421a511aSJoao Martins
511421a511aSJoao Martins /*
512421a511aSJoao Martins * Mark the whole IOVA range as dirty even if only one of
513421a511aSJoao Martins * the replicated PTEs were marked dirty.
514421a511aSJoao Martins */
515421a511aSJoao Martins if (pte_test_and_clear_dirty(ptep, pgsize, flags))
516421a511aSJoao Martins iommu_dirty_bitmap_record(dirty, iova, pgsize);
517421a511aSJoao Martins iova += pgsize;
518421a511aSJoao Martins } while (iova < end);
519421a511aSJoao Martins
520421a511aSJoao Martins return 0;
521421a511aSJoao Martins }
522421a511aSJoao Martins
52318954252SSuravee Suthikulpanit /*
524c9b258c6SSuravee Suthikulpanit * ----------------------------------------------------
525c9b258c6SSuravee Suthikulpanit */
v1_free_pgtable(struct io_pgtable * iop)526c9b258c6SSuravee Suthikulpanit static void v1_free_pgtable(struct io_pgtable *iop)
527c9b258c6SSuravee Suthikulpanit {
528670b5779SJason Gunthorpe struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
529ce00eeceSMatthew Wilcox (Oracle) LIST_HEAD(freelist);
530e42ba063SSuravee Suthikulpanit
531e42ba063SSuravee Suthikulpanit if (pgtable->mode == PAGE_MODE_NONE)
532e42ba063SSuravee Suthikulpanit return;
533e42ba063SSuravee Suthikulpanit
534e42ba063SSuravee Suthikulpanit /* Page-table is not visible to IOMMU anymore, so free it */
535e42ba063SSuravee Suthikulpanit BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
536e42ba063SSuravee Suthikulpanit pgtable->mode > PAGE_MODE_6_LEVEL);
537e42ba063SSuravee Suthikulpanit
538ce00eeceSMatthew Wilcox (Oracle) free_sub_pt(pgtable->root, pgtable->mode, &freelist);
5398d00b77aSJason Gunthorpe iommu_put_pages_list(&freelist);
540c9b258c6SSuravee Suthikulpanit }
541c9b258c6SSuravee Suthikulpanit
v1_alloc_pgtable(struct io_pgtable_cfg * cfg,void * cookie)542c9b258c6SSuravee Suthikulpanit static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
543c9b258c6SSuravee Suthikulpanit {
544c9b258c6SSuravee Suthikulpanit struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
545c9b258c6SSuravee Suthikulpanit
54647f218d1SJason Gunthorpe pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
5478d00b77aSJason Gunthorpe if (!pgtable->root)
5488d00b77aSJason Gunthorpe return NULL;
5498d00b77aSJason Gunthorpe pgtable->mode = PAGE_MODE_3_LEVEL;
5508d00b77aSJason Gunthorpe
551*f0295913SJoerg Roedel cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
55286c5eac3SChen Ni cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
55386c5eac3SChen Ni cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
554c9b258c6SSuravee Suthikulpanit
555670b5779SJason Gunthorpe pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages;
556670b5779SJason Gunthorpe pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages;
557670b5779SJason Gunthorpe pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
558670b5779SJason Gunthorpe pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
559441555c6SSuravee Suthikulpanit
560670b5779SJason Gunthorpe return &pgtable->pgtbl;
561c9b258c6SSuravee Suthikulpanit }
562c9b258c6SSuravee Suthikulpanit
563c9b258c6SSuravee Suthikulpanit struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
564c9b258c6SSuravee Suthikulpanit .alloc = v1_alloc_pgtable,
565c9b258c6SSuravee Suthikulpanit .free = v1_free_pgtable,
566c9b258c6SSuravee Suthikulpanit };
567