1 /* 2 * IBM System z Huge TLB Page Support for Kernel. 3 * 4 * Copyright IBM Corp. 2007,2016 5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 6 */ 7 8 #define KMSG_COMPONENT "hugetlb" 9 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 14 /* 15 * If the bit selected by single-bit bitmask "a" is set within "x", move 16 * it to the position indicated by single-bit bitmask "b". 17 */ 18 #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 19 20 static inline unsigned long __pte_to_rste(pte_t pte) 21 { 22 unsigned long rste; 23 24 /* 25 * Convert encoding pte bits pmd / pud bits 26 * lIR.uswrdy.p dy..R...I...wr 27 * empty 010.000000.0 -> 00..0...1...00 28 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 29 * prot-none, clean, young 111.000001.1 -> 01..1...1...00 30 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 31 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 32 * read-only, clean, old 111.000100.1 -> 00..1...1...01 33 * read-only, clean, young 101.000101.1 -> 01..1...0...01 34 * read-only, dirty, old 111.000110.1 -> 10..1...1...01 35 * read-only, dirty, young 101.000111.1 -> 11..1...0...01 36 * read-write, clean, old 111.001100.1 -> 00..1...1...11 37 * read-write, clean, young 101.001101.1 -> 01..1...0...11 38 * read-write, dirty, old 110.001110.1 -> 10..0...1...11 39 * read-write, dirty, young 100.001111.1 -> 11..0...0...11 40 * HW-bits: R read-only, I invalid 41 * SW-bits: p present, y young, d dirty, r read, w write, s special, 42 * u unused, l large 43 */ 44 if (pte_present(pte)) { 45 rste = pte_val(pte) & PAGE_MASK; 46 rste |= move_set_bit(pte_val(pte), _PAGE_READ, 47 _SEGMENT_ENTRY_READ); 48 rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 49 _SEGMENT_ENTRY_WRITE); 50 rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 51 _SEGMENT_ENTRY_INVALID); 52 rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 53 _SEGMENT_ENTRY_PROTECT); 54 rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 55 _SEGMENT_ENTRY_DIRTY); 56 rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 57 _SEGMENT_ENTRY_YOUNG); 58 #ifdef CONFIG_MEM_SOFT_DIRTY 59 rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 60 _SEGMENT_ENTRY_SOFT_DIRTY); 61 #endif 62 } else 63 rste = _SEGMENT_ENTRY_INVALID; 64 return rste; 65 } 66 67 static inline pte_t __rste_to_pte(unsigned long rste) 68 { 69 int present; 70 pte_t pte; 71 72 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 73 present = pud_present(__pud(rste)); 74 else 75 present = pmd_present(__pmd(rste)); 76 77 /* 78 * Convert encoding pmd / pud bits pte bits 79 * dy..R...I...wr lIR.uswrdy.p 80 * empty 00..0...1...00 -> 010.000000.0 81 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 82 * prot-none, clean, young 01..1...1...00 -> 111.000001.1 83 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 84 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 85 * read-only, clean, old 00..1...1...01 -> 111.000100.1 86 * read-only, clean, young 01..1...0...01 -> 101.000101.1 87 * read-only, dirty, old 10..1...1...01 -> 111.000110.1 88 * read-only, dirty, young 11..1...0...01 -> 101.000111.1 89 * read-write, clean, old 00..1...1...11 -> 111.001100.1 90 * read-write, clean, young 01..1...0...11 -> 101.001101.1 91 * read-write, dirty, old 10..0...1...11 -> 110.001110.1 92 * read-write, dirty, young 11..0...0...11 -> 100.001111.1 93 * HW-bits: R read-only, I invalid 94 * SW-bits: p present, y young, d dirty, r read, w write, s special, 95 * u unused, l large 96 */ 97 if (present) { 98 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 99 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 100 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 101 _PAGE_READ); 102 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 103 _PAGE_WRITE); 104 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 105 _PAGE_INVALID); 106 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 107 _PAGE_PROTECT); 108 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 109 _PAGE_DIRTY); 110 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 111 _PAGE_YOUNG); 112 #ifdef CONFIG_MEM_SOFT_DIRTY 113 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 114 _PAGE_DIRTY); 115 #endif 116 } else 117 pte_val(pte) = _PAGE_INVALID; 118 return pte; 119 } 120 121 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 122 pte_t *ptep, pte_t pte) 123 { 124 unsigned long rste = __pte_to_rste(pte); 125 126 /* Set correct table type for 2G hugepages */ 127 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 128 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 129 else 130 rste |= _SEGMENT_ENTRY_LARGE; 131 pte_val(*ptep) = rste; 132 } 133 134 pte_t huge_ptep_get(pte_t *ptep) 135 { 136 return __rste_to_pte(pte_val(*ptep)); 137 } 138 139 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 140 unsigned long addr, pte_t *ptep) 141 { 142 pte_t pte = huge_ptep_get(ptep); 143 pmd_t *pmdp = (pmd_t *) ptep; 144 pud_t *pudp = (pud_t *) ptep; 145 146 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 147 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 148 else 149 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 150 return pte; 151 } 152 153 pte_t *huge_pte_alloc(struct mm_struct *mm, 154 unsigned long addr, unsigned long sz) 155 { 156 pgd_t *pgdp; 157 pud_t *pudp; 158 pmd_t *pmdp = NULL; 159 160 pgdp = pgd_offset(mm, addr); 161 pudp = pud_alloc(mm, pgdp, addr); 162 if (pudp) { 163 if (sz == PUD_SIZE) 164 return (pte_t *) pudp; 165 else if (sz == PMD_SIZE) 166 pmdp = pmd_alloc(mm, pudp, addr); 167 } 168 return (pte_t *) pmdp; 169 } 170 171 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 172 { 173 pgd_t *pgdp; 174 pud_t *pudp; 175 pmd_t *pmdp = NULL; 176 177 pgdp = pgd_offset(mm, addr); 178 if (pgd_present(*pgdp)) { 179 pudp = pud_offset(pgdp, addr); 180 if (pud_present(*pudp)) { 181 if (pud_large(*pudp)) 182 return (pte_t *) pudp; 183 pmdp = pmd_offset(pudp, addr); 184 } 185 } 186 return (pte_t *) pmdp; 187 } 188 189 int pmd_huge(pmd_t pmd) 190 { 191 return pmd_large(pmd); 192 } 193 194 int pud_huge(pud_t pud) 195 { 196 return pud_large(pud); 197 } 198 199 struct page * 200 follow_huge_pud(struct mm_struct *mm, unsigned long address, 201 pud_t *pud, int flags) 202 { 203 if (flags & FOLL_GET) 204 return NULL; 205 206 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 207 } 208 209 static __init int setup_hugepagesz(char *opt) 210 { 211 unsigned long size; 212 char *string = opt; 213 214 size = memparse(opt, &opt); 215 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 216 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 217 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 218 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 219 } else { 220 hugetlb_bad_size(); 221 pr_err("hugepagesz= specifies an unsupported page size %s\n", 222 string); 223 return 0; 224 } 225 return 1; 226 } 227 __setup("hugepagesz=", setup_hugepagesz); 228