GNU Linux-libre 4.9.309-gnu1
[releases.git] / drivers / hv / hv.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *
21  */
22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/slab.h>
27 #include <linux/vmalloc.h>
28 #include <linux/hyperv.h>
29 #include <linux/version.h>
30 #include <linux/interrupt.h>
31 #include <linux/clockchips.h>
32 #include <asm/hyperv.h>
33 #include <asm/mshyperv.h>
34 #include <asm/nospec-branch.h>
35 #include "hyperv_vmbus.h"
36
37 /* The one and only */
38 struct hv_context hv_context = {
39         .synic_initialized      = false,
40         .hypercall_page         = NULL,
41 };
42
43 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
44 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff
45 #define HV_MIN_DELTA_TICKS 1
46
47 /*
48  * query_hypervisor_info - Get version info of the windows hypervisor
49  */
50 unsigned int host_info_eax;
51 unsigned int host_info_ebx;
52 unsigned int host_info_ecx;
53 unsigned int host_info_edx;
54
55 static int query_hypervisor_info(void)
56 {
57         unsigned int eax;
58         unsigned int ebx;
59         unsigned int ecx;
60         unsigned int edx;
61         unsigned int max_leaf;
62         unsigned int op;
63
64         /*
65         * Its assumed that this is called after confirming that Viridian
66         * is present. Query id and revision.
67         */
68         eax = 0;
69         ebx = 0;
70         ecx = 0;
71         edx = 0;
72         op = HVCPUID_VENDOR_MAXFUNCTION;
73         cpuid(op, &eax, &ebx, &ecx, &edx);
74
75         max_leaf = eax;
76
77         if (max_leaf >= HVCPUID_VERSION) {
78                 eax = 0;
79                 ebx = 0;
80                 ecx = 0;
81                 edx = 0;
82                 op = HVCPUID_VERSION;
83                 cpuid(op, &eax, &ebx, &ecx, &edx);
84                 host_info_eax = eax;
85                 host_info_ebx = ebx;
86                 host_info_ecx = ecx;
87                 host_info_edx = edx;
88         }
89         return max_leaf;
90 }
91
92 /*
93  * hv_do_hypercall- Invoke the specified hypercall
94  */
95 u64 hv_do_hypercall(u64 control, void *input, void *output)
96 {
97         u64 input_address = (input) ? virt_to_phys(input) : 0;
98         u64 output_address = (output) ? virt_to_phys(output) : 0;
99         void *hypercall_page = hv_context.hypercall_page;
100 #ifdef CONFIG_X86_64
101         u64 hv_status = 0;
102
103         if (!hypercall_page)
104                 return (u64)ULLONG_MAX;
105
106         __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
107         __asm__ __volatile__(CALL_NOSPEC :
108                              "=a" (hv_status) :
109                              "c" (control), "d" (input_address),
110                              THUNK_TARGET(hypercall_page));
111
112         return hv_status;
113
114 #else
115
116         u32 control_hi = control >> 32;
117         u32 control_lo = control & 0xFFFFFFFF;
118         u32 hv_status_hi = 1;
119         u32 hv_status_lo = 1;
120         u32 input_address_hi = input_address >> 32;
121         u32 input_address_lo = input_address & 0xFFFFFFFF;
122         u32 output_address_hi = output_address >> 32;
123         u32 output_address_lo = output_address & 0xFFFFFFFF;
124
125         if (!hypercall_page)
126                 return (u64)ULLONG_MAX;
127
128         __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
129                               "=a"(hv_status_lo) : "d" (control_hi),
130                               "a" (control_lo), "b" (input_address_hi),
131                               "c" (input_address_lo), "D"(output_address_hi),
132                               "S"(output_address_lo),
133                               THUNK_TARGET(hypercall_page));
134
135         return hv_status_lo | ((u64)hv_status_hi << 32);
136 #endif /* !x86_64 */
137 }
138 EXPORT_SYMBOL_GPL(hv_do_hypercall);
139
140 #ifdef CONFIG_X86_64
141 static cycle_t read_hv_clock_tsc(struct clocksource *arg)
142 {
143         cycle_t current_tick;
144         struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
145
146         if (tsc_pg->tsc_sequence != 0) {
147                 /*
148                  * Use the tsc page to compute the value.
149                  */
150
151                 while (1) {
152                         cycle_t tmp;
153                         u32 sequence = tsc_pg->tsc_sequence;
154                         u64 cur_tsc;
155                         u64 scale = tsc_pg->tsc_scale;
156                         s64 offset = tsc_pg->tsc_offset;
157
158                         rdtscll(cur_tsc);
159                         /* current_tick = ((cur_tsc *scale) >> 64) + offset */
160                         asm("mulq %3"
161                                 : "=d" (current_tick), "=a" (tmp)
162                                 : "a" (cur_tsc), "r" (scale));
163
164                         current_tick += offset;
165                         if (tsc_pg->tsc_sequence == sequence)
166                                 return current_tick;
167
168                         if (tsc_pg->tsc_sequence != 0)
169                                 continue;
170                         /*
171                          * Fallback using MSR method.
172                          */
173                         break;
174                 }
175         }
176         rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
177         return current_tick;
178 }
179
180 static struct clocksource hyperv_cs_tsc = {
181                 .name           = "hyperv_clocksource_tsc_page",
182                 .rating         = 425,
183                 .read           = read_hv_clock_tsc,
184                 .mask           = CLOCKSOURCE_MASK(64),
185                 .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
186 };
187 #endif
188
189
190 /*
191  * hv_init - Main initialization routine.
192  *
193  * This routine must be called before any other routines in here are called
194  */
195 int hv_init(void)
196 {
197         int max_leaf;
198         union hv_x64_msr_hypercall_contents hypercall_msr;
199         void *virtaddr = NULL;
200
201         memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
202         memset(hv_context.synic_message_page, 0,
203                sizeof(void *) * NR_CPUS);
204         memset(hv_context.post_msg_page, 0,
205                sizeof(void *) * NR_CPUS);
206         memset(hv_context.vp_index, 0,
207                sizeof(int) * NR_CPUS);
208         memset(hv_context.event_dpc, 0,
209                sizeof(void *) * NR_CPUS);
210         memset(hv_context.msg_dpc, 0,
211                sizeof(void *) * NR_CPUS);
212         memset(hv_context.clk_evt, 0,
213                sizeof(void *) * NR_CPUS);
214
215         max_leaf = query_hypervisor_info();
216
217         /*
218          * Write our OS ID.
219          */
220         hv_context.guestid = generate_guest_id(0, LINUX_VERSION_CODE, 0);
221         wrmsrl(HV_X64_MSR_GUEST_OS_ID, hv_context.guestid);
222
223         /* See if the hypercall page is already set */
224         rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
225
226         virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
227
228         if (!virtaddr)
229                 goto cleanup;
230
231         hypercall_msr.enable = 1;
232
233         hypercall_msr.guest_physical_address = vmalloc_to_pfn(virtaddr);
234         wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
235
236         /* Confirm that hypercall page did get setup. */
237         hypercall_msr.as_uint64 = 0;
238         rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
239
240         if (!hypercall_msr.enable)
241                 goto cleanup;
242
243         hv_context.hypercall_page = virtaddr;
244
245 #ifdef CONFIG_X86_64
246         if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
247                 union hv_x64_msr_hypercall_contents tsc_msr;
248                 void *va_tsc;
249
250                 va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
251                 if (!va_tsc)
252                         goto cleanup;
253                 hv_context.tsc_page = va_tsc;
254
255                 rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
256
257                 tsc_msr.enable = 1;
258                 tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc);
259
260                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
261                 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
262         }
263 #endif
264         return 0;
265
266 cleanup:
267         if (virtaddr) {
268                 if (hypercall_msr.enable) {
269                         hypercall_msr.as_uint64 = 0;
270                         wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
271                 }
272
273                 vfree(virtaddr);
274         }
275
276         return -ENOTSUPP;
277 }
278
279 /*
280  * hv_cleanup - Cleanup routine.
281  *
282  * This routine is called normally during driver unloading or exiting.
283  */
284 void hv_cleanup(bool crash)
285 {
286         union hv_x64_msr_hypercall_contents hypercall_msr;
287
288         /* Reset our OS id */
289         wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
290
291         if (hv_context.hypercall_page) {
292                 hypercall_msr.as_uint64 = 0;
293                 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
294                 if (!crash)
295                         vfree(hv_context.hypercall_page);
296                 hv_context.hypercall_page = NULL;
297         }
298
299 #ifdef CONFIG_X86_64
300         /*
301          * Cleanup the TSC page based CS.
302          */
303         if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
304                 /*
305                  * Crash can happen in an interrupt context and unregistering
306                  * a clocksource is impossible and redundant in this case.
307                  */
308                 if (!oops_in_progress) {
309                         clocksource_change_rating(&hyperv_cs_tsc, 10);
310                         clocksource_unregister(&hyperv_cs_tsc);
311                 }
312
313                 hypercall_msr.as_uint64 = 0;
314                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
315                 if (!crash) {
316                         vfree(hv_context.tsc_page);
317                         hv_context.tsc_page = NULL;
318                 }
319         }
320 #endif
321 }
322
323 /*
324  * hv_post_message - Post a message using the hypervisor message IPC.
325  *
326  * This involves a hypercall.
327  */
328 int hv_post_message(union hv_connection_id connection_id,
329                   enum hv_message_type message_type,
330                   void *payload, size_t payload_size)
331 {
332
333         struct hv_input_post_message *aligned_msg;
334         u64 status;
335
336         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
337                 return -EMSGSIZE;
338
339         aligned_msg = (struct hv_input_post_message *)
340                         hv_context.post_msg_page[get_cpu()];
341
342         aligned_msg->connectionid = connection_id;
343         aligned_msg->reserved = 0;
344         aligned_msg->message_type = message_type;
345         aligned_msg->payload_size = payload_size;
346         memcpy((void *)aligned_msg->payload, payload, payload_size);
347
348         status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
349
350         put_cpu();
351         return status & 0xFFFF;
352 }
353
354 static int hv_ce_set_next_event(unsigned long delta,
355                                 struct clock_event_device *evt)
356 {
357         cycle_t current_tick;
358
359         WARN_ON(!clockevent_state_oneshot(evt));
360
361         rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
362         current_tick += delta;
363         wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
364         return 0;
365 }
366
367 static int hv_ce_shutdown(struct clock_event_device *evt)
368 {
369         wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
370         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
371
372         return 0;
373 }
374
375 static int hv_ce_set_oneshot(struct clock_event_device *evt)
376 {
377         union hv_timer_config timer_cfg;
378
379         timer_cfg.enable = 1;
380         timer_cfg.auto_enable = 1;
381         timer_cfg.sintx = VMBUS_MESSAGE_SINT;
382         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
383
384         return 0;
385 }
386
387 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
388 {
389         dev->name = "Hyper-V clockevent";
390         dev->features = CLOCK_EVT_FEAT_ONESHOT;
391         dev->cpumask = cpumask_of(cpu);
392         dev->rating = 1000;
393         /*
394          * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
395          * result in clockevents_config_and_register() taking additional
396          * references to the hv_vmbus module making it impossible to unload.
397          */
398
399         dev->set_state_shutdown = hv_ce_shutdown;
400         dev->set_state_oneshot = hv_ce_set_oneshot;
401         dev->set_next_event = hv_ce_set_next_event;
402 }
403
404
405 int hv_synic_alloc(void)
406 {
407         size_t size = sizeof(struct tasklet_struct);
408         size_t ced_size = sizeof(struct clock_event_device);
409         int cpu;
410
411         hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
412                                          GFP_ATOMIC);
413         if (hv_context.hv_numa_map == NULL) {
414                 pr_err("Unable to allocate NUMA map\n");
415                 goto err;
416         }
417
418         for_each_present_cpu(cpu) {
419                 hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
420                 if (hv_context.event_dpc[cpu] == NULL) {
421                         pr_err("Unable to allocate event dpc\n");
422                         goto err;
423                 }
424                 tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
425
426                 hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
427                 if (hv_context.msg_dpc[cpu] == NULL) {
428                         pr_err("Unable to allocate event dpc\n");
429                         goto err;
430                 }
431                 tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
432
433                 hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
434                 if (hv_context.clk_evt[cpu] == NULL) {
435                         pr_err("Unable to allocate clock event device\n");
436                         goto err;
437                 }
438
439                 hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
440
441                 hv_context.synic_message_page[cpu] =
442                         (void *)get_zeroed_page(GFP_ATOMIC);
443
444                 if (hv_context.synic_message_page[cpu] == NULL) {
445                         pr_err("Unable to allocate SYNIC message page\n");
446                         goto err;
447                 }
448
449                 hv_context.synic_event_page[cpu] =
450                         (void *)get_zeroed_page(GFP_ATOMIC);
451
452                 if (hv_context.synic_event_page[cpu] == NULL) {
453                         pr_err("Unable to allocate SYNIC event page\n");
454                         goto err;
455                 }
456
457                 hv_context.post_msg_page[cpu] =
458                         (void *)get_zeroed_page(GFP_ATOMIC);
459
460                 if (hv_context.post_msg_page[cpu] == NULL) {
461                         pr_err("Unable to allocate post msg page\n");
462                         goto err;
463                 }
464
465                 INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
466         }
467
468         return 0;
469 err:
470         return -ENOMEM;
471 }
472
473 static void hv_synic_free_cpu(int cpu)
474 {
475         kfree(hv_context.event_dpc[cpu]);
476         kfree(hv_context.msg_dpc[cpu]);
477         kfree(hv_context.clk_evt[cpu]);
478         if (hv_context.synic_event_page[cpu])
479                 free_page((unsigned long)hv_context.synic_event_page[cpu]);
480         if (hv_context.synic_message_page[cpu])
481                 free_page((unsigned long)hv_context.synic_message_page[cpu]);
482         if (hv_context.post_msg_page[cpu])
483                 free_page((unsigned long)hv_context.post_msg_page[cpu]);
484 }
485
486 void hv_synic_free(void)
487 {
488         int cpu;
489
490         kfree(hv_context.hv_numa_map);
491         for_each_present_cpu(cpu)
492                 hv_synic_free_cpu(cpu);
493 }
494
495 /*
496  * hv_synic_init - Initialize the Synthethic Interrupt Controller.
497  *
498  * If it is already initialized by another entity (ie x2v shim), we need to
499  * retrieve the initialized message and event pages.  Otherwise, we create and
500  * initialize the message and event pages.
501  */
502 void hv_synic_init(void *arg)
503 {
504         u64 version;
505         union hv_synic_simp simp;
506         union hv_synic_siefp siefp;
507         union hv_synic_sint shared_sint;
508         union hv_synic_scontrol sctrl;
509         u64 vp_index;
510
511         int cpu = smp_processor_id();
512
513         if (!hv_context.hypercall_page)
514                 return;
515
516         /* Check the version */
517         rdmsrl(HV_X64_MSR_SVERSION, version);
518
519         /* Setup the Synic's message page */
520         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
521         simp.simp_enabled = 1;
522         simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
523                 >> PAGE_SHIFT;
524
525         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
526
527         /* Setup the Synic's event page */
528         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
529         siefp.siefp_enabled = 1;
530         siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
531                 >> PAGE_SHIFT;
532
533         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
534
535         /* Setup the shared SINT. */
536         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
537
538         shared_sint.as_uint64 = 0;
539         shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
540         shared_sint.masked = false;
541         shared_sint.auto_eoi = true;
542
543         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
544
545         /* Enable the global synic bit */
546         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
547         sctrl.enable = 1;
548
549         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
550
551         hv_context.synic_initialized = true;
552
553         /*
554          * Setup the mapping between Hyper-V's notion
555          * of cpuid and Linux' notion of cpuid.
556          * This array will be indexed using Linux cpuid.
557          */
558         rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
559         hv_context.vp_index[cpu] = (u32)vp_index;
560
561         /*
562          * Register the per-cpu clockevent source.
563          */
564         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
565                 clockevents_config_and_register(hv_context.clk_evt[cpu],
566                                                 HV_TIMER_FREQUENCY,
567                                                 HV_MIN_DELTA_TICKS,
568                                                 HV_MAX_MAX_DELTA_TICKS);
569         return;
570 }
571
572 /*
573  * hv_synic_clockevents_cleanup - Cleanup clockevent devices
574  */
575 void hv_synic_clockevents_cleanup(void)
576 {
577         int cpu;
578
579         if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
580                 return;
581
582         for_each_online_cpu(cpu)
583                 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
584 }
585
586 /*
587  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
588  */
589 void hv_synic_cleanup(void *arg)
590 {
591         union hv_synic_sint shared_sint;
592         union hv_synic_simp simp;
593         union hv_synic_siefp siefp;
594         union hv_synic_scontrol sctrl;
595         int cpu = smp_processor_id();
596
597         if (!hv_context.synic_initialized)
598                 return;
599
600         /* Turn off clockevent device */
601         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
602                 hv_ce_shutdown(hv_context.clk_evt[cpu]);
603
604         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
605
606         shared_sint.masked = 1;
607
608         /* Need to correctly cleanup in the case of SMP!!! */
609         /* Disable the interrupt */
610         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
611
612         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
613         simp.simp_enabled = 0;
614         simp.base_simp_gpa = 0;
615
616         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
617
618         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
619         siefp.siefp_enabled = 0;
620         siefp.base_siefp_gpa = 0;
621
622         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
623
624         /* Disable the global synic bit */
625         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
626         sctrl.enable = 0;
627         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
628 }