xref: /linux/arch/x86/hyperv/mmu.c (revision 74ce1896c6c65b2f8cccbf59162d542988835835)
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2 
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7 
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12 
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15 
16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17 struct hv_flush_pcpu {
18 	u64 address_space;
19 	u64 flags;
20 	u64 processor_mask;
21 	u64 gva_list[];
22 };
23 
24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25 struct hv_flush_pcpu_ex {
26 	u64 address_space;
27 	u64 flags;
28 	struct {
29 		u64 format;
30 		u64 valid_bank_mask;
31 		u64 bank_contents[];
32 	} hv_vp_set;
33 	u64 gva_list[];
34 };
35 
36 /* Each gva in gva_list encodes up to 4096 pages to flush */
37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
38 
39 static struct hv_flush_pcpu __percpu *pcpu_flush;
40 
41 static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
42 
43 /*
44  * Fills in gva_list starting from offset. Returns the number of items added.
45  */
46 static inline int fill_gva_list(u64 gva_list[], int offset,
47 				unsigned long start, unsigned long end)
48 {
49 	int gva_n = offset;
50 	unsigned long cur = start, diff;
51 
52 	do {
53 		diff = end > cur ? end - cur : 0;
54 
55 		gva_list[gva_n] = cur & PAGE_MASK;
56 		/*
57 		 * Lower 12 bits encode the number of additional
58 		 * pages to flush (in addition to the 'cur' page).
59 		 */
60 		if (diff >= HV_TLB_FLUSH_UNIT)
61 			gva_list[gva_n] |= ~PAGE_MASK;
62 		else if (diff)
63 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
64 
65 		cur += HV_TLB_FLUSH_UNIT;
66 		gva_n++;
67 
68 	} while (cur < end);
69 
70 	return gva_n - offset;
71 }
72 
73 /* Return the number of banks in the resulting vp_set */
74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
75 				    const struct cpumask *cpus)
76 {
77 	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
78 
79 	/*
80 	 * Some banks may end up being empty but this is acceptable.
81 	 */
82 	for_each_cpu(cpu, cpus) {
83 		vcpu = hv_cpu_number_to_vp_number(cpu);
84 		vcpu_bank = vcpu / 64;
85 		vcpu_offset = vcpu % 64;
86 
87 		/* valid_bank_mask can represent up to 64 banks */
88 		if (vcpu_bank >= 64)
89 			return 0;
90 
91 		__set_bit(vcpu_offset, (unsigned long *)
92 			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
93 		if (vcpu_bank >= nr_bank)
94 			nr_bank = vcpu_bank + 1;
95 	}
96 	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
97 
98 	return nr_bank;
99 }
100 
101 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
102 				    const struct flush_tlb_info *info)
103 {
104 	int cpu, vcpu, gva_n, max_gvas;
105 	struct hv_flush_pcpu *flush;
106 	u64 status = U64_MAX;
107 	unsigned long flags;
108 
109 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
110 
111 	if (!pcpu_flush || !hv_hypercall_pg)
112 		goto do_native;
113 
114 	if (cpumask_empty(cpus))
115 		return;
116 
117 	local_irq_save(flags);
118 
119 	flush = this_cpu_ptr(pcpu_flush);
120 
121 	if (info->mm) {
122 		flush->address_space = virt_to_phys(info->mm->pgd);
123 		flush->flags = 0;
124 	} else {
125 		flush->address_space = 0;
126 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
127 	}
128 
129 	flush->processor_mask = 0;
130 	if (cpumask_equal(cpus, cpu_present_mask)) {
131 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
132 	} else {
133 		for_each_cpu(cpu, cpus) {
134 			vcpu = hv_cpu_number_to_vp_number(cpu);
135 			if (vcpu >= 64)
136 				goto do_native;
137 
138 			__set_bit(vcpu, (unsigned long *)
139 				  &flush->processor_mask);
140 		}
141 	}
142 
143 	/*
144 	 * We can flush not more than max_gvas with one hypercall. Flush the
145 	 * whole address space if we were asked to do more.
146 	 */
147 	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
148 
149 	if (info->end == TLB_FLUSH_ALL) {
150 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
151 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
152 					 flush, NULL);
153 	} else if (info->end &&
154 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
155 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
156 					 flush, NULL);
157 	} else {
158 		gva_n = fill_gva_list(flush->gva_list, 0,
159 				      info->start, info->end);
160 		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
161 					     gva_n, 0, flush, NULL);
162 	}
163 
164 	local_irq_restore(flags);
165 
166 	if (!(status & HV_HYPERCALL_RESULT_MASK))
167 		return;
168 do_native:
169 	native_flush_tlb_others(cpus, info);
170 }
171 
172 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
173 				       const struct flush_tlb_info *info)
174 {
175 	int nr_bank = 0, max_gvas, gva_n;
176 	struct hv_flush_pcpu_ex *flush;
177 	u64 status = U64_MAX;
178 	unsigned long flags;
179 
180 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
181 
182 	if (!pcpu_flush_ex || !hv_hypercall_pg)
183 		goto do_native;
184 
185 	if (cpumask_empty(cpus))
186 		return;
187 
188 	local_irq_save(flags);
189 
190 	flush = this_cpu_ptr(pcpu_flush_ex);
191 
192 	if (info->mm) {
193 		flush->address_space = virt_to_phys(info->mm->pgd);
194 		flush->flags = 0;
195 	} else {
196 		flush->address_space = 0;
197 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
198 	}
199 
200 	flush->hv_vp_set.valid_bank_mask = 0;
201 
202 	if (!cpumask_equal(cpus, cpu_present_mask)) {
203 		flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
204 		nr_bank = cpumask_to_vp_set(flush, cpus);
205 	}
206 
207 	if (!nr_bank) {
208 		flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
209 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
210 	}
211 
212 	/*
213 	 * We can flush not more than max_gvas with one hypercall. Flush the
214 	 * whole address space if we were asked to do more.
215 	 */
216 	max_gvas =
217 		(PAGE_SIZE - sizeof(*flush) - nr_bank *
218 		 sizeof(flush->hv_vp_set.bank_contents[0])) /
219 		sizeof(flush->gva_list[0]);
220 
221 	if (info->end == TLB_FLUSH_ALL) {
222 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
223 		status = hv_do_rep_hypercall(
224 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
225 			0, nr_bank + 2, flush, NULL);
226 	} else if (info->end &&
227 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
228 		status = hv_do_rep_hypercall(
229 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
230 			0, nr_bank + 2, flush, NULL);
231 	} else {
232 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
233 				      info->start, info->end);
234 		status = hv_do_rep_hypercall(
235 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
236 			gva_n, nr_bank + 2, flush, NULL);
237 	}
238 
239 	local_irq_restore(flags);
240 
241 	if (!(status & HV_HYPERCALL_RESULT_MASK))
242 		return;
243 do_native:
244 	native_flush_tlb_others(cpus, info);
245 }
246 
247 void hyperv_setup_mmu_ops(void)
248 {
249 	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
250 		return;
251 
252 	setup_clear_cpu_cap(X86_FEATURE_PCID);
253 
254 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
255 		pr_info("Using hypercall for remote TLB flush\n");
256 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
257 	} else {
258 		pr_info("Using ext hypercall for remote TLB flush\n");
259 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
260 	}
261 }
262 
263 void hyper_alloc_mmu(void)
264 {
265 	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
266 		return;
267 
268 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
269 		pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
270 	else
271 		pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
272 }
273