arch/x86/hyperv/mmu.c

   1 #define pr_fmt(fmt)  "Hyper-V: " fmt
   2
   3 #include <linux/hyperv.h>
   4 #include <linux/log2.h>
   5 #include <linux/slab.h>
   6 #include <linux/types.h>
   7
   8 #include <asm/fpu/api.h>
   9 #include <asm/mshyperv.h>
  10 #include <asm/msr.h>
  11 #include <asm/tlbflush.h>
  12
  13 #define CREATE_TRACE_POINTS
  14 #include <asm/trace/hyperv.h>
  15
  16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
  17 struct hv_flush_pcpu {
  18         u64 address_space;
  19         u64 flags;
  20         u64 processor_mask;
  21         u64 gva_list[];
  22 };
  23
  24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
  25 struct hv_flush_pcpu_ex {
  26         u64 address_space;
  27         u64 flags;
  28         struct {
  29                 u64 format;
  30                 u64 valid_bank_mask;
  31                 u64 bank_contents[];
  32         } hv_vp_set;
  33         u64 gva_list[];
  34 };
  35
  36 /* Each gva in gva_list encodes up to 4096 pages to flush */
  37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
  38
  39 static struct hv_flush_pcpu __percpu **pcpu_flush;
  40
  41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
  42
  43 /*
  44  * Fills in gva_list starting from offset. Returns the number of items added.
  45  */
  46 static inline int fill_gva_list(u64 gva_list[], int offset,
  47                                 unsigned long start, unsigned long end)
  48 {
  49         int gva_n = offset;
  50         unsigned long cur = start, diff;
  51
  52         do {
  53                 diff = end > cur ? end - cur : 0;
  54
  55                 gva_list[gva_n] = cur & PAGE_MASK;
  56                 /*
  57                  * Lower 12 bits encode the number of additional
  58                  * pages to flush (in addition to the 'cur' page).
  59                  */
  60                 if (diff >= HV_TLB_FLUSH_UNIT) {
  61                         gva_list[gva_n] |= ~PAGE_MASK;
  62                         cur += HV_TLB_FLUSH_UNIT;
  63                 }  else if (diff) {
  64                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
  65                         cur = end;
  66                 }
  67
  68                 gva_n++;
  69
  70         } while (cur < end);
  71
  72         return gva_n - offset;
  73 }
  74
  75 /* Return the number of banks in the resulting vp_set */
  76 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
  77                                     const struct cpumask *cpus)
  78 {
  79         int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
  80
  81         /* valid_bank_mask can represent up to 64 banks */
  82         if (hv_max_vp_index / 64 >= 64)
  83                 return 0;
  84
  85         /*
  86          * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
  87          * structs are not cleared between calls, we risk flushing unneeded
  88          * vCPUs otherwise.
  89          */
  90         for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
  91                 flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
  92
  93         /*
  94          * Some banks may end up being empty but this is acceptable.
  95          */
  96         for_each_cpu(cpu, cpus) {
  97                 vcpu = hv_cpu_number_to_vp_number(cpu);
  98                 vcpu_bank = vcpu / 64;
  99                 vcpu_offset = vcpu % 64;
 100                 __set_bit(vcpu_offset, (unsigned long *)
 101                           &flush->hv_vp_set.bank_contents[vcpu_bank]);
 102                 if (vcpu_bank >= nr_bank)
 103                         nr_bank = vcpu_bank + 1;
 104         }
 105         flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
 106
 107         return nr_bank;
 108 }
 109
 110 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 111                                     const struct flush_tlb_info *info)
 112 {
 113         int cpu, vcpu, gva_n, max_gvas;
 114         struct hv_flush_pcpu **flush_pcpu;
 115         struct hv_flush_pcpu *flush;
 116         u64 status = U64_MAX;
 117         unsigned long flags;
 118
 119         trace_hyperv_mmu_flush_tlb_others(cpus, info);
 120
 121         if (!pcpu_flush || !hv_hypercall_pg)
 122                 goto do_native;
 123
 124         if (cpumask_empty(cpus))
 125                 return;
 126
 127         local_irq_save(flags);
 128
 129         flush_pcpu = this_cpu_ptr(pcpu_flush);
 130
 131         if (unlikely(!*flush_pcpu))
 132                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
 133
 134         flush = *flush_pcpu;
 135
 136         if (unlikely(!flush)) {
 137                 local_irq_restore(flags);
 138                 goto do_native;
 139         }
 140
 141         if (info->mm) {
 142                 /*
 143                  * AddressSpace argument must match the CR3 with PCID bits
 144                  * stripped out.
 145                  */
 146                 flush->address_space = virt_to_phys(info->mm->pgd);
 147                 flush->address_space &= CR3_ADDR_MASK;
 148                 flush->flags = 0;
 149         } else {
 150                 flush->address_space = 0;
 151                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 152         }
 153
 154         flush->processor_mask = 0;
 155         if (cpumask_equal(cpus, cpu_present_mask)) {
 156                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 157         } else {
 158                 for_each_cpu(cpu, cpus) {
 159                         vcpu = hv_cpu_number_to_vp_number(cpu);
 160                         if (vcpu >= 64)
 161                                 goto do_native;
 162
 163                         __set_bit(vcpu, (unsigned long *)
 164                                   &flush->processor_mask);
 165                 }
 166         }
 167
 168         /*
 169          * We can flush not more than max_gvas with one hypercall. Flush the
 170          * whole address space if we were asked to do more.
 171          */
 172         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
 173
 174         if (info->end == TLB_FLUSH_ALL) {
 175                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 176                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 177                                          flush, NULL);
 178         } else if (info->end &&
 179                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 180                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 181                                          flush, NULL);
 182         } else {
 183                 gva_n = fill_gva_list(flush->gva_list, 0,
 184                                       info->start, info->end);
 185                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
 186                                              gva_n, 0, flush, NULL);
 187         }
 188
 189         local_irq_restore(flags);
 190
 191         if (!(status & HV_HYPERCALL_RESULT_MASK))
 192                 return;
 193 do_native:
 194         native_flush_tlb_others(cpus, info);
 195 }
 196
 197 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 198                                        const struct flush_tlb_info *info)
 199 {
 200         int nr_bank = 0, max_gvas, gva_n;
 201         struct hv_flush_pcpu_ex **flush_pcpu;
 202         struct hv_flush_pcpu_ex *flush;
 203         u64 status = U64_MAX;
 204         unsigned long flags;
 205
 206         trace_hyperv_mmu_flush_tlb_others(cpus, info);
 207
 208         if (!pcpu_flush_ex || !hv_hypercall_pg)
 209                 goto do_native;
 210
 211         if (cpumask_empty(cpus))
 212                 return;
 213
 214         local_irq_save(flags);
 215
 216         flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
 217
 218         if (unlikely(!*flush_pcpu))
 219                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
 220
 221         flush = *flush_pcpu;
 222
 223         if (unlikely(!flush)) {
 224                 local_irq_restore(flags);
 225                 goto do_native;
 226         }
 227
 228         if (info->mm) {
 229                 /*
 230                  * AddressSpace argument must match the CR3 with PCID bits
 231                  * stripped out.
 232                  */
 233                 flush->address_space = virt_to_phys(info->mm->pgd);
 234                 flush->address_space &= CR3_ADDR_MASK;
 235                 flush->flags = 0;
 236         } else {
 237                 flush->address_space = 0;
 238                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 239         }
 240
 241         flush->hv_vp_set.valid_bank_mask = 0;
 242
 243         if (!cpumask_equal(cpus, cpu_present_mask)) {
 244                 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
 245                 nr_bank = cpumask_to_vp_set(flush, cpus);
 246         }
 247
 248         if (!nr_bank) {
 249                 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
 250                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 251         }
 252
 253         /*
 254          * We can flush not more than max_gvas with one hypercall. Flush the
 255          * whole address space if we were asked to do more.
 256          */
 257         max_gvas =
 258                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
 259                  sizeof(flush->hv_vp_set.bank_contents[0])) /
 260                 sizeof(flush->gva_list[0]);
 261
 262         if (info->end == TLB_FLUSH_ALL) {
 263                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 264                 status = hv_do_rep_hypercall(
 265                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 266                         0, nr_bank, flush, NULL);
 267         } else if (info->end &&
 268                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 269                 status = hv_do_rep_hypercall(
 270                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 271                         0, nr_bank, flush, NULL);
 272         } else {
 273                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
 274                                       info->start, info->end);
 275                 status = hv_do_rep_hypercall(
 276                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
 277                         gva_n, nr_bank, flush, NULL);
 278         }
 279
 280         local_irq_restore(flags);
 281
 282         if (!(status & HV_HYPERCALL_RESULT_MASK))
 283                 return;
 284 do_native:
 285         native_flush_tlb_others(cpus, info);
 286 }
 287
 288 void hyperv_setup_mmu_ops(void)
 289 {
 290         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 291                 return;
 292
 293         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
 294                 pr_info("Using hypercall for remote TLB flush\n");
 295                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
 296         } else {
 297                 pr_info("Using ext hypercall for remote TLB flush\n");
 298                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
 299         }
 300 }
 301
 302 void hyper_alloc_mmu(void)
 303 {
 304         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 305                 return;
 306
 307         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
 308                 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
 309         else
 310                 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
 311 }