GNU Linux-libre 4.14.290-gnu1
[releases.git] / arch / x86 / hyperv / mmu.c
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15
16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17 struct hv_flush_pcpu {
18         u64 address_space;
19         u64 flags;
20         u64 processor_mask;
21         u64 gva_list[];
22 };
23
24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25 struct hv_flush_pcpu_ex {
26         u64 address_space;
27         u64 flags;
28         struct {
29                 u64 format;
30                 u64 valid_bank_mask;
31                 u64 bank_contents[];
32         } hv_vp_set;
33         u64 gva_list[];
34 };
35
36 /* Each gva in gva_list encodes up to 4096 pages to flush */
37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
38
39 static struct hv_flush_pcpu __percpu **pcpu_flush;
40
41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
42
43 /*
44  * Fills in gva_list starting from offset. Returns the number of items added.
45  */
46 static inline int fill_gva_list(u64 gva_list[], int offset,
47                                 unsigned long start, unsigned long end)
48 {
49         int gva_n = offset;
50         unsigned long cur = start, diff;
51
52         do {
53                 diff = end > cur ? end - cur : 0;
54
55                 gva_list[gva_n] = cur & PAGE_MASK;
56                 /*
57                  * Lower 12 bits encode the number of additional
58                  * pages to flush (in addition to the 'cur' page).
59                  */
60                 if (diff >= HV_TLB_FLUSH_UNIT) {
61                         gva_list[gva_n] |= ~PAGE_MASK;
62                         cur += HV_TLB_FLUSH_UNIT;
63                 }  else if (diff) {
64                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
65                         cur = end;
66                 }
67
68                 gva_n++;
69
70         } while (cur < end);
71
72         return gva_n - offset;
73 }
74
75 /* Return the number of banks in the resulting vp_set */
76 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
77                                     const struct cpumask *cpus)
78 {
79         int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
80
81         /* valid_bank_mask can represent up to 64 banks */
82         if (hv_max_vp_index / 64 >= 64)
83                 return 0;
84
85         /*
86          * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
87          * structs are not cleared between calls, we risk flushing unneeded
88          * vCPUs otherwise.
89          */
90         for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
91                 flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
92
93         /*
94          * Some banks may end up being empty but this is acceptable.
95          */
96         for_each_cpu(cpu, cpus) {
97                 vcpu = hv_cpu_number_to_vp_number(cpu);
98                 vcpu_bank = vcpu / 64;
99                 vcpu_offset = vcpu % 64;
100                 __set_bit(vcpu_offset, (unsigned long *)
101                           &flush->hv_vp_set.bank_contents[vcpu_bank]);
102                 if (vcpu_bank >= nr_bank)
103                         nr_bank = vcpu_bank + 1;
104         }
105         flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
106
107         return nr_bank;
108 }
109
110 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
111                                     const struct flush_tlb_info *info)
112 {
113         int cpu, vcpu, gva_n, max_gvas;
114         struct hv_flush_pcpu **flush_pcpu;
115         struct hv_flush_pcpu *flush;
116         u64 status = U64_MAX;
117         unsigned long flags;
118
119         trace_hyperv_mmu_flush_tlb_others(cpus, info);
120
121         if (!pcpu_flush || !hv_hypercall_pg)
122                 goto do_native;
123
124         if (cpumask_empty(cpus))
125                 return;
126
127         local_irq_save(flags);
128
129         flush_pcpu = this_cpu_ptr(pcpu_flush);
130
131         if (unlikely(!*flush_pcpu))
132                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
133
134         flush = *flush_pcpu;
135
136         if (unlikely(!flush)) {
137                 local_irq_restore(flags);
138                 goto do_native;
139         }
140
141         if (info->mm) {
142                 /*
143                  * AddressSpace argument must match the CR3 with PCID bits
144                  * stripped out.
145                  */
146                 flush->address_space = virt_to_phys(info->mm->pgd);
147                 flush->address_space &= CR3_ADDR_MASK;
148                 flush->flags = 0;
149         } else {
150                 flush->address_space = 0;
151                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
152         }
153
154         flush->processor_mask = 0;
155         if (cpumask_equal(cpus, cpu_present_mask)) {
156                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
157         } else {
158                 for_each_cpu(cpu, cpus) {
159                         vcpu = hv_cpu_number_to_vp_number(cpu);
160                         if (vcpu >= 64)
161                                 goto do_native;
162
163                         __set_bit(vcpu, (unsigned long *)
164                                   &flush->processor_mask);
165                 }
166         }
167
168         /*
169          * We can flush not more than max_gvas with one hypercall. Flush the
170          * whole address space if we were asked to do more.
171          */
172         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
173
174         if (info->end == TLB_FLUSH_ALL) {
175                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
176                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
177                                          flush, NULL);
178         } else if (info->end &&
179                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
180                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
181                                          flush, NULL);
182         } else {
183                 gva_n = fill_gva_list(flush->gva_list, 0,
184                                       info->start, info->end);
185                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
186                                              gva_n, 0, flush, NULL);
187         }
188
189         local_irq_restore(flags);
190
191         if (!(status & HV_HYPERCALL_RESULT_MASK))
192                 return;
193 do_native:
194         native_flush_tlb_others(cpus, info);
195 }
196
197 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
198                                        const struct flush_tlb_info *info)
199 {
200         int nr_bank = 0, max_gvas, gva_n;
201         struct hv_flush_pcpu_ex **flush_pcpu;
202         struct hv_flush_pcpu_ex *flush;
203         u64 status = U64_MAX;
204         unsigned long flags;
205
206         trace_hyperv_mmu_flush_tlb_others(cpus, info);
207
208         if (!pcpu_flush_ex || !hv_hypercall_pg)
209                 goto do_native;
210
211         if (cpumask_empty(cpus))
212                 return;
213
214         local_irq_save(flags);
215
216         flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
217
218         if (unlikely(!*flush_pcpu))
219                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
220
221         flush = *flush_pcpu;
222
223         if (unlikely(!flush)) {
224                 local_irq_restore(flags);
225                 goto do_native;
226         }
227
228         if (info->mm) {
229                 /*
230                  * AddressSpace argument must match the CR3 with PCID bits
231                  * stripped out.
232                  */
233                 flush->address_space = virt_to_phys(info->mm->pgd);
234                 flush->address_space &= CR3_ADDR_MASK;
235                 flush->flags = 0;
236         } else {
237                 flush->address_space = 0;
238                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
239         }
240
241         flush->hv_vp_set.valid_bank_mask = 0;
242
243         if (!cpumask_equal(cpus, cpu_present_mask)) {
244                 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
245                 nr_bank = cpumask_to_vp_set(flush, cpus);
246         }
247
248         if (!nr_bank) {
249                 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
250                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
251         }
252
253         /*
254          * We can flush not more than max_gvas with one hypercall. Flush the
255          * whole address space if we were asked to do more.
256          */
257         max_gvas =
258                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
259                  sizeof(flush->hv_vp_set.bank_contents[0])) /
260                 sizeof(flush->gva_list[0]);
261
262         if (info->end == TLB_FLUSH_ALL) {
263                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
264                 status = hv_do_rep_hypercall(
265                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
266                         0, nr_bank, flush, NULL);
267         } else if (info->end &&
268                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
269                 status = hv_do_rep_hypercall(
270                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
271                         0, nr_bank, flush, NULL);
272         } else {
273                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
274                                       info->start, info->end);
275                 status = hv_do_rep_hypercall(
276                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
277                         gva_n, nr_bank, flush, NULL);
278         }
279
280         local_irq_restore(flags);
281
282         if (!(status & HV_HYPERCALL_RESULT_MASK))
283                 return;
284 do_native:
285         native_flush_tlb_others(cpus, info);
286 }
287
288 void hyperv_setup_mmu_ops(void)
289 {
290         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
291                 return;
292
293         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
294                 pr_info("Using hypercall for remote TLB flush\n");
295                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
296         } else {
297                 pr_info("Using ext hypercall for remote TLB flush\n");
298                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
299         }
300 }
301
302 void hyper_alloc_mmu(void)
303 {
304         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
305                 return;
306
307         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
308                 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
309         else
310                 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
311 }