xref: /linux/arch/x86/hyperv/mmu.c (revision 8386f58f8deda81110283798a387fb53ec21957c)
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2 
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7 
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12 #include <asm/tlb.h>
13 
14 #define CREATE_TRACE_POINTS
15 #include <asm/trace/hyperv.h>
16 
17 /* Each gva in gva_list encodes up to 4096 pages to flush */
18 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
19 
20 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
21 				      const struct flush_tlb_info *info);
22 
23 /*
24  * Fills in gva_list starting from offset. Returns the number of items added.
25  */
26 static inline int fill_gva_list(u64 gva_list[], int offset,
27 				unsigned long start, unsigned long end)
28 {
29 	int gva_n = offset;
30 	unsigned long cur = start, diff;
31 
32 	do {
33 		diff = end > cur ? end - cur : 0;
34 
35 		gva_list[gva_n] = cur & PAGE_MASK;
36 		/*
37 		 * Lower 12 bits encode the number of additional
38 		 * pages to flush (in addition to the 'cur' page).
39 		 */
40 		if (diff >= HV_TLB_FLUSH_UNIT) {
41 			gva_list[gva_n] |= ~PAGE_MASK;
42 			cur += HV_TLB_FLUSH_UNIT;
43 		}  else if (diff) {
44 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
45 			cur = end;
46 		}
47 
48 		gva_n++;
49 
50 	} while (cur < end);
51 
52 	return gva_n - offset;
53 }
54 
55 static bool cpu_is_lazy(int cpu)
56 {
57 	return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
58 }
59 
60 static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
61 				   const struct flush_tlb_info *info)
62 {
63 	int cpu, vcpu, gva_n, max_gvas;
64 	struct hv_tlb_flush **flush_pcpu;
65 	struct hv_tlb_flush *flush;
66 	u64 status;
67 	unsigned long flags;
68 	bool do_lazy = !info->freed_tables;
69 
70 	trace_hyperv_mmu_flush_tlb_multi(cpus, info);
71 
72 	if (!hv_hypercall_pg)
73 		goto do_native;
74 
75 	local_irq_save(flags);
76 
77 	flush_pcpu = (struct hv_tlb_flush **)
78 		     this_cpu_ptr(hyperv_pcpu_input_arg);
79 
80 	flush = *flush_pcpu;
81 
82 	if (unlikely(!flush)) {
83 		local_irq_restore(flags);
84 		goto do_native;
85 	}
86 
87 	if (info->mm) {
88 		/*
89 		 * AddressSpace argument must match the CR3 with PCID bits
90 		 * stripped out.
91 		 */
92 		flush->address_space = virt_to_phys(info->mm->pgd);
93 		flush->address_space &= CR3_ADDR_MASK;
94 		flush->flags = 0;
95 	} else {
96 		flush->address_space = 0;
97 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
98 	}
99 
100 	flush->processor_mask = 0;
101 	if (cpumask_equal(cpus, cpu_present_mask)) {
102 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
103 	} else {
104 		/*
105 		 * From the supplied CPU set we need to figure out if we can get
106 		 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
107 		 * hypercalls. This is possible when the highest VP number in
108 		 * the set is < 64. As VP numbers are usually in ascending order
109 		 * and match Linux CPU ids, here is an optimization: we check
110 		 * the VP number for the highest bit in the supplied set first
111 		 * so we can quickly find out if using *_EX hypercalls is a
112 		 * must. We will also check all VP numbers when walking the
113 		 * supplied CPU set to remain correct in all cases.
114 		 */
115 		cpu = cpumask_last(cpus);
116 
117 		if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
118 			goto do_ex_hypercall;
119 
120 		for_each_cpu(cpu, cpus) {
121 			if (do_lazy && cpu_is_lazy(cpu))
122 				continue;
123 			vcpu = hv_cpu_number_to_vp_number(cpu);
124 			if (vcpu == VP_INVAL) {
125 				local_irq_restore(flags);
126 				goto do_native;
127 			}
128 
129 			if (vcpu >= 64)
130 				goto do_ex_hypercall;
131 
132 			__set_bit(vcpu, (unsigned long *)
133 				  &flush->processor_mask);
134 		}
135 
136 		/* nothing to flush if 'processor_mask' ends up being empty */
137 		if (!flush->processor_mask) {
138 			local_irq_restore(flags);
139 			return;
140 		}
141 	}
142 
143 	/*
144 	 * We can flush not more than max_gvas with one hypercall. Flush the
145 	 * whole address space if we were asked to do more.
146 	 */
147 	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
148 
149 	if (info->end == TLB_FLUSH_ALL) {
150 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
151 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
152 					 flush, NULL);
153 	} else if (info->end &&
154 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
155 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
156 					 flush, NULL);
157 	} else {
158 		gva_n = fill_gva_list(flush->gva_list, 0,
159 				      info->start, info->end);
160 		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
161 					     gva_n, 0, flush, NULL);
162 	}
163 	goto check_status;
164 
165 do_ex_hypercall:
166 	status = hyperv_flush_tlb_others_ex(cpus, info);
167 
168 check_status:
169 	local_irq_restore(flags);
170 
171 	if (hv_result_success(status))
172 		return;
173 do_native:
174 	native_flush_tlb_multi(cpus, info);
175 }
176 
177 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
178 				      const struct flush_tlb_info *info)
179 {
180 	int nr_bank = 0, max_gvas, gva_n;
181 	struct hv_tlb_flush_ex **flush_pcpu;
182 	struct hv_tlb_flush_ex *flush;
183 	u64 status;
184 
185 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
186 		return HV_STATUS_INVALID_PARAMETER;
187 
188 	flush_pcpu = (struct hv_tlb_flush_ex **)
189 		     this_cpu_ptr(hyperv_pcpu_input_arg);
190 
191 	flush = *flush_pcpu;
192 
193 	if (info->mm) {
194 		/*
195 		 * AddressSpace argument must match the CR3 with PCID bits
196 		 * stripped out.
197 		 */
198 		flush->address_space = virt_to_phys(info->mm->pgd);
199 		flush->address_space &= CR3_ADDR_MASK;
200 		flush->flags = 0;
201 	} else {
202 		flush->address_space = 0;
203 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
204 	}
205 
206 	flush->hv_vp_set.valid_bank_mask = 0;
207 
208 	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
209 	nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus,
210 			info->freed_tables ? NULL : cpu_is_lazy);
211 	if (nr_bank < 0)
212 		return HV_STATUS_INVALID_PARAMETER;
213 
214 	/*
215 	 * We can flush not more than max_gvas with one hypercall. Flush the
216 	 * whole address space if we were asked to do more.
217 	 */
218 	max_gvas =
219 		(PAGE_SIZE - sizeof(*flush) - nr_bank *
220 		 sizeof(flush->hv_vp_set.bank_contents[0])) /
221 		sizeof(flush->gva_list[0]);
222 
223 	if (info->end == TLB_FLUSH_ALL) {
224 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
225 		status = hv_do_rep_hypercall(
226 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
227 			0, nr_bank, flush, NULL);
228 	} else if (info->end &&
229 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
230 		status = hv_do_rep_hypercall(
231 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
232 			0, nr_bank, flush, NULL);
233 	} else {
234 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
235 				      info->start, info->end);
236 		status = hv_do_rep_hypercall(
237 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
238 			gva_n, nr_bank, flush, NULL);
239 	}
240 
241 	return status;
242 }
243 
244 void hyperv_setup_mmu_ops(void)
245 {
246 	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
247 		return;
248 
249 	pr_info("Using hypercall for remote TLB flush\n");
250 	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
251 	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
252 }
253