GNU Linux-libre 4.4.288-gnu1
[releases.git] / drivers / hv / vmbus_drv.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *   K. Y. Srinivasan <kys@microsoft.com>
21  *
22  */
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25 #include <linux/init.h>
26 #include <linux/module.h>
27 #include <linux/device.h>
28 #include <linux/interrupt.h>
29 #include <linux/sysctl.h>
30 #include <linux/slab.h>
31 #include <linux/acpi.h>
32 #include <linux/completion.h>
33 #include <linux/hyperv.h>
34 #include <linux/kernel_stat.h>
35 #include <linux/clockchips.h>
36 #include <linux/cpu.h>
37 #include <asm/hyperv.h>
38 #include <asm/hypervisor.h>
39 #include <asm/mshyperv.h>
40 #include <linux/notifier.h>
41 #include <linux/ptrace.h>
42 #include <linux/screen_info.h>
43 #include <linux/kdebug.h>
44 #include <linux/random.h>
45 #include "hyperv_vmbus.h"
46
47 static struct acpi_device  *hv_acpi_dev;
48
49 static struct tasklet_struct msg_dpc;
50 static struct completion probe_event;
51 static int irq;
52
53
54 static void hyperv_report_panic(struct pt_regs *regs)
55 {
56         static bool panic_reported;
57
58         /*
59          * We prefer to report panic on 'die' chain as we have proper
60          * registers to report, but if we miss it (e.g. on BUG()) we need
61          * to report it on 'panic'.
62          */
63         if (panic_reported)
64                 return;
65         panic_reported = true;
66
67         wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
68         wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
69         wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
70         wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
71         wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);
72
73         /*
74          * Let Hyper-V know there is crash data available
75          */
76         wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
77 }
78
79 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
80                               void *args)
81 {
82         struct pt_regs *regs;
83
84         regs = current_pt_regs();
85
86         hyperv_report_panic(regs);
87         return NOTIFY_DONE;
88 }
89
90 static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
91                             void *args)
92 {
93         struct die_args *die = (struct die_args *)args;
94         struct pt_regs *regs = die->regs;
95
96         hyperv_report_panic(regs);
97         return NOTIFY_DONE;
98 }
99
100 static struct notifier_block hyperv_die_block = {
101         .notifier_call = hyperv_die_event,
102 };
103 static struct notifier_block hyperv_panic_block = {
104         .notifier_call = hyperv_panic_event,
105 };
106
107 struct resource *hyperv_mmio;
108 DEFINE_SEMAPHORE(hyperv_mmio_lock);
109
110 static int vmbus_exists(void)
111 {
112         if (hv_acpi_dev == NULL)
113                 return -ENODEV;
114
115         return 0;
116 }
117
118 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
119 static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
120 {
121         int i;
122         for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
123                 sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
124 }
125
126 static u8 channel_monitor_group(struct vmbus_channel *channel)
127 {
128         return (u8)channel->offermsg.monitorid / 32;
129 }
130
131 static u8 channel_monitor_offset(struct vmbus_channel *channel)
132 {
133         return (u8)channel->offermsg.monitorid % 32;
134 }
135
136 static u32 channel_pending(struct vmbus_channel *channel,
137                            struct hv_monitor_page *monitor_page)
138 {
139         u8 monitor_group = channel_monitor_group(channel);
140         return monitor_page->trigger_group[monitor_group].pending;
141 }
142
143 static u32 channel_latency(struct vmbus_channel *channel,
144                            struct hv_monitor_page *monitor_page)
145 {
146         u8 monitor_group = channel_monitor_group(channel);
147         u8 monitor_offset = channel_monitor_offset(channel);
148         return monitor_page->latency[monitor_group][monitor_offset];
149 }
150
151 static u32 channel_conn_id(struct vmbus_channel *channel,
152                            struct hv_monitor_page *monitor_page)
153 {
154         u8 monitor_group = channel_monitor_group(channel);
155         u8 monitor_offset = channel_monitor_offset(channel);
156         return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
157 }
158
159 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
160                        char *buf)
161 {
162         struct hv_device *hv_dev = device_to_hv_device(dev);
163
164         if (!hv_dev->channel)
165                 return -ENODEV;
166         return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
167 }
168 static DEVICE_ATTR_RO(id);
169
170 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
171                           char *buf)
172 {
173         struct hv_device *hv_dev = device_to_hv_device(dev);
174
175         if (!hv_dev->channel)
176                 return -ENODEV;
177         return sprintf(buf, "%d\n", hv_dev->channel->state);
178 }
179 static DEVICE_ATTR_RO(state);
180
181 static ssize_t monitor_id_show(struct device *dev,
182                                struct device_attribute *dev_attr, char *buf)
183 {
184         struct hv_device *hv_dev = device_to_hv_device(dev);
185
186         if (!hv_dev->channel)
187                 return -ENODEV;
188         return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
189 }
190 static DEVICE_ATTR_RO(monitor_id);
191
192 static ssize_t class_id_show(struct device *dev,
193                                struct device_attribute *dev_attr, char *buf)
194 {
195         struct hv_device *hv_dev = device_to_hv_device(dev);
196
197         if (!hv_dev->channel)
198                 return -ENODEV;
199         return sprintf(buf, "{%pUl}\n",
200                        hv_dev->channel->offermsg.offer.if_type.b);
201 }
202 static DEVICE_ATTR_RO(class_id);
203
204 static ssize_t device_id_show(struct device *dev,
205                               struct device_attribute *dev_attr, char *buf)
206 {
207         struct hv_device *hv_dev = device_to_hv_device(dev);
208
209         if (!hv_dev->channel)
210                 return -ENODEV;
211         return sprintf(buf, "{%pUl}\n",
212                        hv_dev->channel->offermsg.offer.if_instance.b);
213 }
214 static DEVICE_ATTR_RO(device_id);
215
216 static ssize_t modalias_show(struct device *dev,
217                              struct device_attribute *dev_attr, char *buf)
218 {
219         struct hv_device *hv_dev = device_to_hv_device(dev);
220         char alias_name[VMBUS_ALIAS_LEN + 1];
221
222         print_alias_name(hv_dev, alias_name);
223         return sprintf(buf, "vmbus:%s\n", alias_name);
224 }
225 static DEVICE_ATTR_RO(modalias);
226
227 static ssize_t server_monitor_pending_show(struct device *dev,
228                                            struct device_attribute *dev_attr,
229                                            char *buf)
230 {
231         struct hv_device *hv_dev = device_to_hv_device(dev);
232
233         if (!hv_dev->channel)
234                 return -ENODEV;
235         return sprintf(buf, "%d\n",
236                        channel_pending(hv_dev->channel,
237                                        vmbus_connection.monitor_pages[1]));
238 }
239 static DEVICE_ATTR_RO(server_monitor_pending);
240
241 static ssize_t client_monitor_pending_show(struct device *dev,
242                                            struct device_attribute *dev_attr,
243                                            char *buf)
244 {
245         struct hv_device *hv_dev = device_to_hv_device(dev);
246
247         if (!hv_dev->channel)
248                 return -ENODEV;
249         return sprintf(buf, "%d\n",
250                        channel_pending(hv_dev->channel,
251                                        vmbus_connection.monitor_pages[1]));
252 }
253 static DEVICE_ATTR_RO(client_monitor_pending);
254
255 static ssize_t server_monitor_latency_show(struct device *dev,
256                                            struct device_attribute *dev_attr,
257                                            char *buf)
258 {
259         struct hv_device *hv_dev = device_to_hv_device(dev);
260
261         if (!hv_dev->channel)
262                 return -ENODEV;
263         return sprintf(buf, "%d\n",
264                        channel_latency(hv_dev->channel,
265                                        vmbus_connection.monitor_pages[0]));
266 }
267 static DEVICE_ATTR_RO(server_monitor_latency);
268
269 static ssize_t client_monitor_latency_show(struct device *dev,
270                                            struct device_attribute *dev_attr,
271                                            char *buf)
272 {
273         struct hv_device *hv_dev = device_to_hv_device(dev);
274
275         if (!hv_dev->channel)
276                 return -ENODEV;
277         return sprintf(buf, "%d\n",
278                        channel_latency(hv_dev->channel,
279                                        vmbus_connection.monitor_pages[1]));
280 }
281 static DEVICE_ATTR_RO(client_monitor_latency);
282
283 static ssize_t server_monitor_conn_id_show(struct device *dev,
284                                            struct device_attribute *dev_attr,
285                                            char *buf)
286 {
287         struct hv_device *hv_dev = device_to_hv_device(dev);
288
289         if (!hv_dev->channel)
290                 return -ENODEV;
291         return sprintf(buf, "%d\n",
292                        channel_conn_id(hv_dev->channel,
293                                        vmbus_connection.monitor_pages[0]));
294 }
295 static DEVICE_ATTR_RO(server_monitor_conn_id);
296
297 static ssize_t client_monitor_conn_id_show(struct device *dev,
298                                            struct device_attribute *dev_attr,
299                                            char *buf)
300 {
301         struct hv_device *hv_dev = device_to_hv_device(dev);
302
303         if (!hv_dev->channel)
304                 return -ENODEV;
305         return sprintf(buf, "%d\n",
306                        channel_conn_id(hv_dev->channel,
307                                        vmbus_connection.monitor_pages[1]));
308 }
309 static DEVICE_ATTR_RO(client_monitor_conn_id);
310
311 static ssize_t out_intr_mask_show(struct device *dev,
312                                   struct device_attribute *dev_attr, char *buf)
313 {
314         struct hv_device *hv_dev = device_to_hv_device(dev);
315         struct hv_ring_buffer_debug_info outbound;
316
317         if (!hv_dev->channel)
318                 return -ENODEV;
319         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
320                 return -EINVAL;
321         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
322         return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
323 }
324 static DEVICE_ATTR_RO(out_intr_mask);
325
326 static ssize_t out_read_index_show(struct device *dev,
327                                    struct device_attribute *dev_attr, char *buf)
328 {
329         struct hv_device *hv_dev = device_to_hv_device(dev);
330         struct hv_ring_buffer_debug_info outbound;
331
332         if (!hv_dev->channel)
333                 return -ENODEV;
334         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
335                 return -EINVAL;
336         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
337         return sprintf(buf, "%d\n", outbound.current_read_index);
338 }
339 static DEVICE_ATTR_RO(out_read_index);
340
341 static ssize_t out_write_index_show(struct device *dev,
342                                     struct device_attribute *dev_attr,
343                                     char *buf)
344 {
345         struct hv_device *hv_dev = device_to_hv_device(dev);
346         struct hv_ring_buffer_debug_info outbound;
347
348         if (!hv_dev->channel)
349                 return -ENODEV;
350         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
351                 return -EINVAL;
352         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
353         return sprintf(buf, "%d\n", outbound.current_write_index);
354 }
355 static DEVICE_ATTR_RO(out_write_index);
356
357 static ssize_t out_read_bytes_avail_show(struct device *dev,
358                                          struct device_attribute *dev_attr,
359                                          char *buf)
360 {
361         struct hv_device *hv_dev = device_to_hv_device(dev);
362         struct hv_ring_buffer_debug_info outbound;
363
364         if (!hv_dev->channel)
365                 return -ENODEV;
366         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
367                 return -EINVAL;
368         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
369         return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
370 }
371 static DEVICE_ATTR_RO(out_read_bytes_avail);
372
373 static ssize_t out_write_bytes_avail_show(struct device *dev,
374                                           struct device_attribute *dev_attr,
375                                           char *buf)
376 {
377         struct hv_device *hv_dev = device_to_hv_device(dev);
378         struct hv_ring_buffer_debug_info outbound;
379
380         if (!hv_dev->channel)
381                 return -ENODEV;
382         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
383                 return -EINVAL;
384         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
385         return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
386 }
387 static DEVICE_ATTR_RO(out_write_bytes_avail);
388
389 static ssize_t in_intr_mask_show(struct device *dev,
390                                  struct device_attribute *dev_attr, char *buf)
391 {
392         struct hv_device *hv_dev = device_to_hv_device(dev);
393         struct hv_ring_buffer_debug_info inbound;
394
395         if (!hv_dev->channel)
396                 return -ENODEV;
397         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
398                 return -EINVAL;
399         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
400         return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
401 }
402 static DEVICE_ATTR_RO(in_intr_mask);
403
404 static ssize_t in_read_index_show(struct device *dev,
405                                   struct device_attribute *dev_attr, char *buf)
406 {
407         struct hv_device *hv_dev = device_to_hv_device(dev);
408         struct hv_ring_buffer_debug_info inbound;
409
410         if (!hv_dev->channel)
411                 return -ENODEV;
412         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
413                 return -EINVAL;
414         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
415         return sprintf(buf, "%d\n", inbound.current_read_index);
416 }
417 static DEVICE_ATTR_RO(in_read_index);
418
419 static ssize_t in_write_index_show(struct device *dev,
420                                    struct device_attribute *dev_attr, char *buf)
421 {
422         struct hv_device *hv_dev = device_to_hv_device(dev);
423         struct hv_ring_buffer_debug_info inbound;
424
425         if (!hv_dev->channel)
426                 return -ENODEV;
427         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
428                 return -EINVAL;
429         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
430         return sprintf(buf, "%d\n", inbound.current_write_index);
431 }
432 static DEVICE_ATTR_RO(in_write_index);
433
434 static ssize_t in_read_bytes_avail_show(struct device *dev,
435                                         struct device_attribute *dev_attr,
436                                         char *buf)
437 {
438         struct hv_device *hv_dev = device_to_hv_device(dev);
439         struct hv_ring_buffer_debug_info inbound;
440
441         if (!hv_dev->channel)
442                 return -ENODEV;
443         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
444                 return -EINVAL;
445         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
446         return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
447 }
448 static DEVICE_ATTR_RO(in_read_bytes_avail);
449
450 static ssize_t in_write_bytes_avail_show(struct device *dev,
451                                          struct device_attribute *dev_attr,
452                                          char *buf)
453 {
454         struct hv_device *hv_dev = device_to_hv_device(dev);
455         struct hv_ring_buffer_debug_info inbound;
456
457         if (!hv_dev->channel)
458                 return -ENODEV;
459         if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
460                 return -EINVAL;
461         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
462         return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
463 }
464 static DEVICE_ATTR_RO(in_write_bytes_avail);
465
466 static ssize_t channel_vp_mapping_show(struct device *dev,
467                                        struct device_attribute *dev_attr,
468                                        char *buf)
469 {
470         struct hv_device *hv_dev = device_to_hv_device(dev);
471         struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
472         unsigned long flags;
473         int buf_size = PAGE_SIZE, n_written, tot_written;
474         struct list_head *cur;
475
476         if (!channel)
477                 return -ENODEV;
478
479         tot_written = snprintf(buf, buf_size, "%u:%u\n",
480                 channel->offermsg.child_relid, channel->target_cpu);
481
482         spin_lock_irqsave(&channel->lock, flags);
483
484         list_for_each(cur, &channel->sc_list) {
485                 if (tot_written >= buf_size - 1)
486                         break;
487
488                 cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
489                 n_written = scnprintf(buf + tot_written,
490                                      buf_size - tot_written,
491                                      "%u:%u\n",
492                                      cur_sc->offermsg.child_relid,
493                                      cur_sc->target_cpu);
494                 tot_written += n_written;
495         }
496
497         spin_unlock_irqrestore(&channel->lock, flags);
498
499         return tot_written;
500 }
501 static DEVICE_ATTR_RO(channel_vp_mapping);
502
503 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
504 static struct attribute *vmbus_attrs[] = {
505         &dev_attr_id.attr,
506         &dev_attr_state.attr,
507         &dev_attr_monitor_id.attr,
508         &dev_attr_class_id.attr,
509         &dev_attr_device_id.attr,
510         &dev_attr_modalias.attr,
511         &dev_attr_server_monitor_pending.attr,
512         &dev_attr_client_monitor_pending.attr,
513         &dev_attr_server_monitor_latency.attr,
514         &dev_attr_client_monitor_latency.attr,
515         &dev_attr_server_monitor_conn_id.attr,
516         &dev_attr_client_monitor_conn_id.attr,
517         &dev_attr_out_intr_mask.attr,
518         &dev_attr_out_read_index.attr,
519         &dev_attr_out_write_index.attr,
520         &dev_attr_out_read_bytes_avail.attr,
521         &dev_attr_out_write_bytes_avail.attr,
522         &dev_attr_in_intr_mask.attr,
523         &dev_attr_in_read_index.attr,
524         &dev_attr_in_write_index.attr,
525         &dev_attr_in_read_bytes_avail.attr,
526         &dev_attr_in_write_bytes_avail.attr,
527         &dev_attr_channel_vp_mapping.attr,
528         NULL,
529 };
530 ATTRIBUTE_GROUPS(vmbus);
531
532 /*
533  * vmbus_uevent - add uevent for our device
534  *
535  * This routine is invoked when a device is added or removed on the vmbus to
536  * generate a uevent to udev in the userspace. The udev will then look at its
537  * rule and the uevent generated here to load the appropriate driver
538  *
539  * The alias string will be of the form vmbus:guid where guid is the string
540  * representation of the device guid (each byte of the guid will be
541  * represented with two hex characters.
542  */
543 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
544 {
545         struct hv_device *dev = device_to_hv_device(device);
546         int ret;
547         char alias_name[VMBUS_ALIAS_LEN + 1];
548
549         print_alias_name(dev, alias_name);
550         ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
551         return ret;
552 }
553
554 static const uuid_le null_guid;
555
556 static inline bool is_null_guid(const __u8 *guid)
557 {
558         if (memcmp(guid, &null_guid, sizeof(uuid_le)))
559                 return false;
560         return true;
561 }
562
563 /*
564  * Return a matching hv_vmbus_device_id pointer.
565  * If there is no match, return NULL.
566  */
567 static const struct hv_vmbus_device_id *hv_vmbus_get_id(
568                                         const struct hv_vmbus_device_id *id,
569                                         const __u8 *guid)
570 {
571         for (; !is_null_guid(id->guid); id++)
572                 if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
573                         return id;
574
575         return NULL;
576 }
577
578
579
580 /*
581  * vmbus_match - Attempt to match the specified device to the specified driver
582  */
583 static int vmbus_match(struct device *device, struct device_driver *driver)
584 {
585         struct hv_driver *drv = drv_to_hv_drv(driver);
586         struct hv_device *hv_dev = device_to_hv_device(device);
587
588         if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b))
589                 return 1;
590
591         return 0;
592 }
593
594 /*
595  * vmbus_probe - Add the new vmbus's child device
596  */
597 static int vmbus_probe(struct device *child_device)
598 {
599         int ret = 0;
600         struct hv_driver *drv =
601                         drv_to_hv_drv(child_device->driver);
602         struct hv_device *dev = device_to_hv_device(child_device);
603         const struct hv_vmbus_device_id *dev_id;
604
605         dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b);
606         if (drv->probe) {
607                 ret = drv->probe(dev, dev_id);
608                 if (ret != 0)
609                         pr_err("probe failed for device %s (%d)\n",
610                                dev_name(child_device), ret);
611
612         } else {
613                 pr_err("probe not set for driver %s\n",
614                        dev_name(child_device));
615                 ret = -ENODEV;
616         }
617         return ret;
618 }
619
620 /*
621  * vmbus_remove - Remove a vmbus device
622  */
623 static int vmbus_remove(struct device *child_device)
624 {
625         struct hv_driver *drv;
626         struct hv_device *dev = device_to_hv_device(child_device);
627
628         if (child_device->driver) {
629                 drv = drv_to_hv_drv(child_device->driver);
630                 if (drv->remove)
631                         drv->remove(dev);
632         }
633
634         return 0;
635 }
636
637
638 /*
639  * vmbus_shutdown - Shutdown a vmbus device
640  */
641 static void vmbus_shutdown(struct device *child_device)
642 {
643         struct hv_driver *drv;
644         struct hv_device *dev = device_to_hv_device(child_device);
645
646
647         /* The device may not be attached yet */
648         if (!child_device->driver)
649                 return;
650
651         drv = drv_to_hv_drv(child_device->driver);
652
653         if (drv->shutdown)
654                 drv->shutdown(dev);
655
656         return;
657 }
658
659
660 /*
661  * vmbus_device_release - Final callback release of the vmbus child device
662  */
663 static void vmbus_device_release(struct device *device)
664 {
665         struct hv_device *hv_dev = device_to_hv_device(device);
666         struct vmbus_channel *channel = hv_dev->channel;
667
668         hv_process_channel_removal(channel,
669                                    channel->offermsg.child_relid);
670         kfree(hv_dev);
671
672 }
673
674 /* The one and only one */
675 static struct bus_type  hv_bus = {
676         .name =         "vmbus",
677         .match =                vmbus_match,
678         .shutdown =             vmbus_shutdown,
679         .remove =               vmbus_remove,
680         .probe =                vmbus_probe,
681         .uevent =               vmbus_uevent,
682         .dev_groups =           vmbus_groups,
683 };
684
685 struct onmessage_work_context {
686         struct work_struct work;
687         struct hv_message msg;
688 };
689
690 static void vmbus_onmessage_work(struct work_struct *work)
691 {
692         struct onmessage_work_context *ctx;
693
694         /* Do not process messages if we're in DISCONNECTED state */
695         if (vmbus_connection.conn_state == DISCONNECTED)
696                 return;
697
698         ctx = container_of(work, struct onmessage_work_context,
699                            work);
700         vmbus_onmessage(&ctx->msg);
701         kfree(ctx);
702 }
703
704 static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
705 {
706         struct clock_event_device *dev = hv_context.clk_evt[cpu];
707
708         if (dev->event_handler)
709                 dev->event_handler(dev);
710
711         msg->header.message_type = HVMSG_NONE;
712
713         /*
714          * Make sure the write to MessageType (ie set to
715          * HVMSG_NONE) happens before we read the
716          * MessagePending and EOMing. Otherwise, the EOMing
717          * will not deliver any more messages since there is
718          * no empty slot
719          */
720         mb();
721
722         if (msg->header.message_flags.msg_pending) {
723                 /*
724                  * This will cause message queue rescan to
725                  * possibly deliver another msg from the
726                  * hypervisor
727                  */
728                 wrmsrl(HV_X64_MSR_EOM, 0);
729         }
730 }
731
732 static void vmbus_on_msg_dpc(unsigned long data)
733 {
734         int cpu = smp_processor_id();
735         void *page_addr = hv_context.synic_message_page[cpu];
736         struct hv_message *msg = (struct hv_message *)page_addr +
737                                   VMBUS_MESSAGE_SINT;
738         struct vmbus_channel_message_header *hdr;
739         struct vmbus_channel_message_table_entry *entry;
740         struct onmessage_work_context *ctx;
741
742         while (1) {
743                 if (msg->header.message_type == HVMSG_NONE)
744                         /* no msg */
745                         break;
746
747                 hdr = (struct vmbus_channel_message_header *)msg->u.payload;
748
749                 if (hdr->msgtype >= CHANNELMSG_COUNT) {
750                         WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
751                         goto msg_handled;
752                 }
753
754                 entry = &channel_message_table[hdr->msgtype];
755                 if (entry->handler_type == VMHT_BLOCKING) {
756                         ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
757                         if (ctx == NULL)
758                                 continue;
759
760                         INIT_WORK(&ctx->work, vmbus_onmessage_work);
761                         memcpy(&ctx->msg, msg, sizeof(*msg));
762
763                         queue_work(vmbus_connection.work_queue, &ctx->work);
764                 } else
765                         entry->message_handler(hdr);
766
767 msg_handled:
768                 msg->header.message_type = HVMSG_NONE;
769
770                 /*
771                  * Make sure the write to MessageType (ie set to
772                  * HVMSG_NONE) happens before we read the
773                  * MessagePending and EOMing. Otherwise, the EOMing
774                  * will not deliver any more messages since there is
775                  * no empty slot
776                  */
777                 mb();
778
779                 if (msg->header.message_flags.msg_pending) {
780                         /*
781                          * This will cause message queue rescan to
782                          * possibly deliver another msg from the
783                          * hypervisor
784                          */
785                         wrmsrl(HV_X64_MSR_EOM, 0);
786                 }
787         }
788 }
789
790 static void vmbus_isr(void)
791 {
792         int cpu = smp_processor_id();
793         void *page_addr;
794         struct hv_message *msg;
795         union hv_synic_event_flags *event;
796         bool handled = false;
797
798         page_addr = hv_context.synic_event_page[cpu];
799         if (page_addr == NULL)
800                 return;
801
802         event = (union hv_synic_event_flags *)page_addr +
803                                          VMBUS_MESSAGE_SINT;
804         /*
805          * Check for events before checking for messages. This is the order
806          * in which events and messages are checked in Windows guests on
807          * Hyper-V, and the Windows team suggested we do the same.
808          */
809
810         if ((vmbus_proto_version == VERSION_WS2008) ||
811                 (vmbus_proto_version == VERSION_WIN7)) {
812
813                 /* Since we are a child, we only need to check bit 0 */
814                 if (sync_test_and_clear_bit(0,
815                         (unsigned long *) &event->flags32[0])) {
816                         handled = true;
817                 }
818         } else {
819                 /*
820                  * Our host is win8 or above. The signaling mechanism
821                  * has changed and we can directly look at the event page.
822                  * If bit n is set then we have an interrup on the channel
823                  * whose id is n.
824                  */
825                 handled = true;
826         }
827
828         if (handled)
829                 tasklet_schedule(hv_context.event_dpc[cpu]);
830
831
832         page_addr = hv_context.synic_message_page[cpu];
833         msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
834
835         /* Check if there are actual msgs to be processed */
836         if (msg->header.message_type != HVMSG_NONE) {
837                 if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
838                         hv_process_timer_expiration(msg, cpu);
839                 else
840                         tasklet_schedule(&msg_dpc);
841         }
842
843         add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
844 }
845
846
847 /*
848  * vmbus_bus_init -Main vmbus driver initialization routine.
849  *
850  * Here, we
851  *      - initialize the vmbus driver context
852  *      - invoke the vmbus hv main init routine
853  *      - get the irq resource
854  *      - retrieve the channel offers
855  */
856 static int vmbus_bus_init(int irq)
857 {
858         int ret;
859
860         /* Hypervisor initialization...setup hypercall page..etc */
861         ret = hv_init();
862         if (ret != 0) {
863                 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
864                 return ret;
865         }
866
867         tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
868
869         ret = bus_register(&hv_bus);
870         if (ret)
871                 goto err_cleanup;
872
873         hv_setup_vmbus_irq(vmbus_isr);
874
875         ret = hv_synic_alloc();
876         if (ret)
877                 goto err_alloc;
878         /*
879          * Initialize the per-cpu interrupt state and
880          * connect to the host.
881          */
882         on_each_cpu(hv_synic_init, NULL, 1);
883         ret = vmbus_connect();
884         if (ret)
885                 goto err_connect;
886
887         if (vmbus_proto_version > VERSION_WIN7)
888                 cpu_hotplug_disable();
889
890         /*
891          * Only register if the crash MSRs are available
892          */
893         if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
894                 register_die_notifier(&hyperv_die_block);
895                 atomic_notifier_chain_register(&panic_notifier_list,
896                                                &hyperv_panic_block);
897         }
898
899         vmbus_request_offers();
900
901         return 0;
902
903 err_connect:
904         on_each_cpu(hv_synic_cleanup, NULL, 1);
905 err_alloc:
906         hv_synic_free();
907         hv_remove_vmbus_irq();
908
909         bus_unregister(&hv_bus);
910
911 err_cleanup:
912         hv_cleanup(false);
913
914         return ret;
915 }
916
917 /**
918  * __vmbus_child_driver_register() - Register a vmbus's driver
919  * @hv_driver: Pointer to driver structure you want to register
920  * @owner: owner module of the drv
921  * @mod_name: module name string
922  *
923  * Registers the given driver with Linux through the 'driver_register()' call
924  * and sets up the hyper-v vmbus handling for this driver.
925  * It will return the state of the 'driver_register()' call.
926  *
927  */
928 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
929 {
930         int ret;
931
932         pr_info("registering driver %s\n", hv_driver->name);
933
934         ret = vmbus_exists();
935         if (ret < 0)
936                 return ret;
937
938         hv_driver->driver.name = hv_driver->name;
939         hv_driver->driver.owner = owner;
940         hv_driver->driver.mod_name = mod_name;
941         hv_driver->driver.bus = &hv_bus;
942
943         ret = driver_register(&hv_driver->driver);
944
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__vmbus_driver_register);
948
949 /**
950  * vmbus_driver_unregister() - Unregister a vmbus's driver
951  * @hv_driver: Pointer to driver structure you want to
952  *             un-register
953  *
954  * Un-register the given driver that was previous registered with a call to
955  * vmbus_driver_register()
956  */
957 void vmbus_driver_unregister(struct hv_driver *hv_driver)
958 {
959         pr_info("unregistering driver %s\n", hv_driver->name);
960
961         if (!vmbus_exists())
962                 driver_unregister(&hv_driver->driver);
963 }
964 EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
965
966 /*
967  * vmbus_device_create - Creates and registers a new child device
968  * on the vmbus.
969  */
970 struct hv_device *vmbus_device_create(const uuid_le *type,
971                                       const uuid_le *instance,
972                                       struct vmbus_channel *channel)
973 {
974         struct hv_device *child_device_obj;
975
976         child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
977         if (!child_device_obj) {
978                 pr_err("Unable to allocate device object for child device\n");
979                 return NULL;
980         }
981
982         child_device_obj->channel = channel;
983         memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
984         memcpy(&child_device_obj->dev_instance, instance,
985                sizeof(uuid_le));
986
987
988         return child_device_obj;
989 }
990
991 /*
992  * vmbus_device_register - Register the child device
993  */
994 int vmbus_device_register(struct hv_device *child_device_obj)
995 {
996         int ret = 0;
997
998         dev_set_name(&child_device_obj->device, "vmbus_%d",
999                      child_device_obj->channel->id);
1000
1001         child_device_obj->device.bus = &hv_bus;
1002         child_device_obj->device.parent = &hv_acpi_dev->dev;
1003         child_device_obj->device.release = vmbus_device_release;
1004
1005         /*
1006          * Register with the LDM. This will kick off the driver/device
1007          * binding...which will eventually call vmbus_match() and vmbus_probe()
1008          */
1009         ret = device_register(&child_device_obj->device);
1010
1011         if (ret)
1012                 pr_err("Unable to register child device\n");
1013         else
1014                 pr_debug("child device %s registered\n",
1015                         dev_name(&child_device_obj->device));
1016
1017         return ret;
1018 }
1019
1020 /*
1021  * vmbus_device_unregister - Remove the specified child device
1022  * from the vmbus.
1023  */
1024 void vmbus_device_unregister(struct hv_device *device_obj)
1025 {
1026         pr_debug("child device %s unregistered\n",
1027                 dev_name(&device_obj->device));
1028
1029         /*
1030          * Kick off the process of unregistering the device.
1031          * This will call vmbus_remove() and eventually vmbus_device_release()
1032          */
1033         device_unregister(&device_obj->device);
1034 }
1035
1036
1037 /*
1038  * VMBUS is an acpi enumerated device. Get the information we
1039  * need from DSDT.
1040  */
1041 #define VTPM_BASE_ADDRESS 0xfed40000
1042 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
1043 {
1044         resource_size_t start = 0;
1045         resource_size_t end = 0;
1046         struct resource *new_res;
1047         struct resource **old_res = &hyperv_mmio;
1048         struct resource **prev_res = NULL;
1049
1050         switch (res->type) {
1051         case ACPI_RESOURCE_TYPE_IRQ:
1052                 irq = res->data.irq.interrupts[0];
1053                 return AE_OK;
1054
1055         /*
1056          * "Address" descriptors are for bus windows. Ignore
1057          * "memory" descriptors, which are for registers on
1058          * devices.
1059          */
1060         case ACPI_RESOURCE_TYPE_ADDRESS32:
1061                 start = res->data.address32.address.minimum;
1062                 end = res->data.address32.address.maximum;
1063                 break;
1064
1065         case ACPI_RESOURCE_TYPE_ADDRESS64:
1066                 start = res->data.address64.address.minimum;
1067                 end = res->data.address64.address.maximum;
1068                 break;
1069
1070         default:
1071                 /* Unused resource type */
1072                 return AE_OK;
1073
1074         }
1075         /*
1076          * Ignore ranges that are below 1MB, as they're not
1077          * necessary or useful here.
1078          */
1079         if (end < 0x100000)
1080                 return AE_OK;
1081
1082         new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC);
1083         if (!new_res)
1084                 return AE_NO_MEMORY;
1085
1086         /* If this range overlaps the virtual TPM, truncate it. */
1087         if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
1088                 end = VTPM_BASE_ADDRESS;
1089
1090         new_res->name = "hyperv mmio";
1091         new_res->flags = IORESOURCE_MEM;
1092         new_res->start = start;
1093         new_res->end = end;
1094
1095         do {
1096                 if (!*old_res) {
1097                         *old_res = new_res;
1098                         break;
1099                 }
1100
1101                 if ((*old_res)->end < new_res->start) {
1102                         new_res->sibling = *old_res;
1103                         if (prev_res)
1104                                 (*prev_res)->sibling = new_res;
1105                         *old_res = new_res;
1106                         break;
1107                 }
1108
1109                 prev_res = old_res;
1110                 old_res = &(*old_res)->sibling;
1111
1112         } while (1);
1113
1114         return AE_OK;
1115 }
1116
1117 static int vmbus_acpi_remove(struct acpi_device *device)
1118 {
1119         struct resource *cur_res;
1120         struct resource *next_res;
1121
1122         if (hyperv_mmio) {
1123                 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) {
1124                         next_res = cur_res->sibling;
1125                         kfree(cur_res);
1126                 }
1127         }
1128
1129         return 0;
1130 }
1131
1132 /**
1133  * vmbus_allocate_mmio() - Pick a memory-mapped I/O range.
1134  * @new:                If successful, supplied a pointer to the
1135  *                      allocated MMIO space.
1136  * @device_obj:         Identifies the caller
1137  * @min:                Minimum guest physical address of the
1138  *                      allocation
1139  * @max:                Maximum guest physical address
1140  * @size:               Size of the range to be allocated
1141  * @align:              Alignment of the range to be allocated
1142  * @fb_overlap_ok:      Whether this allocation can be allowed
1143  *                      to overlap the video frame buffer.
1144  *
1145  * This function walks the resources granted to VMBus by the
1146  * _CRS object in the ACPI namespace underneath the parent
1147  * "bridge" whether that's a root PCI bus in the Generation 1
1148  * case or a Module Device in the Generation 2 case.  It then
1149  * attempts to allocate from the global MMIO pool in a way that
1150  * matches the constraints supplied in these parameters and by
1151  * that _CRS.
1152  *
1153  * Return: 0 on success, -errno on failure
1154  */
1155 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
1156                         resource_size_t min, resource_size_t max,
1157                         resource_size_t size, resource_size_t align,
1158                         bool fb_overlap_ok)
1159 {
1160         struct resource *iter;
1161         resource_size_t range_min, range_max, start, local_min, local_max;
1162         const char *dev_n = dev_name(&device_obj->device);
1163         u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1);
1164         int i, retval;
1165
1166         retval = -ENXIO;
1167         down(&hyperv_mmio_lock);
1168
1169         for (iter = hyperv_mmio; iter; iter = iter->sibling) {
1170                 if ((iter->start >= max) || (iter->end <= min))
1171                         continue;
1172
1173                 range_min = iter->start;
1174                 range_max = iter->end;
1175
1176                 /* If this range overlaps the frame buffer, split it into
1177                    two tries. */
1178                 for (i = 0; i < 2; i++) {
1179                         local_min = range_min;
1180                         local_max = range_max;
1181                         if (fb_overlap_ok || (range_min >= fb_end) ||
1182                             (range_max <= screen_info.lfb_base)) {
1183                                 i++;
1184                         } else {
1185                                 if ((range_min <= screen_info.lfb_base) &&
1186                                     (range_max >= screen_info.lfb_base)) {
1187                                         /*
1188                                          * The frame buffer is in this window,
1189                                          * so trim this into the part that
1190                                          * preceeds the frame buffer.
1191                                          */
1192                                         local_max = screen_info.lfb_base - 1;
1193                                         range_min = fb_end;
1194                                 } else {
1195                                         range_min = fb_end;
1196                                         continue;
1197                                 }
1198                         }
1199
1200                         start = (local_min + align - 1) & ~(align - 1);
1201                         for (; start + size - 1 <= local_max; start += align) {
1202                                 *new = request_mem_region_exclusive(start, size,
1203                                                                     dev_n);
1204                                 if (*new) {
1205                                         retval = 0;
1206                                         goto exit;
1207                                 }
1208                         }
1209                 }
1210         }
1211
1212 exit:
1213         up(&hyperv_mmio_lock);
1214         return retval;
1215 }
1216 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
1217
1218 static int vmbus_acpi_add(struct acpi_device *device)
1219 {
1220         acpi_status result;
1221         int ret_val = -ENODEV;
1222         struct acpi_device *ancestor;
1223
1224         hv_acpi_dev = device;
1225
1226         result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
1227                                         vmbus_walk_resources, NULL);
1228
1229         if (ACPI_FAILURE(result))
1230                 goto acpi_walk_err;
1231         /*
1232          * Some ancestor of the vmbus acpi device (Gen1 or Gen2
1233          * firmware) is the VMOD that has the mmio ranges. Get that.
1234          */
1235         for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) {
1236                 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
1237                                              vmbus_walk_resources, NULL);
1238
1239                 if (ACPI_FAILURE(result))
1240                         continue;
1241                 if (hyperv_mmio)
1242                         break;
1243         }
1244         ret_val = 0;
1245
1246 acpi_walk_err:
1247         complete(&probe_event);
1248         if (ret_val)
1249                 vmbus_acpi_remove(device);
1250         return ret_val;
1251 }
1252
1253 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
1254         {"VMBUS", 0},
1255         {"VMBus", 0},
1256         {"", 0},
1257 };
1258 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
1259
1260 static struct acpi_driver vmbus_acpi_driver = {
1261         .name = "vmbus",
1262         .ids = vmbus_acpi_device_ids,
1263         .ops = {
1264                 .add = vmbus_acpi_add,
1265                 .remove = vmbus_acpi_remove,
1266         },
1267 };
1268
1269 static void hv_kexec_handler(void)
1270 {
1271         int cpu;
1272
1273         hv_synic_clockevents_cleanup();
1274         vmbus_initiate_unload();
1275         for_each_online_cpu(cpu)
1276                 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1277         hv_cleanup(false);
1278 };
1279
1280 static void hv_crash_handler(struct pt_regs *regs)
1281 {
1282         vmbus_initiate_unload();
1283         /*
1284          * In crash handler we can't schedule synic cleanup for all CPUs,
1285          * doing the cleanup for current CPU only. This should be sufficient
1286          * for kdump.
1287          */
1288         hv_synic_cleanup(NULL);
1289         hv_cleanup(true);
1290 };
1291
1292 static int __init hv_acpi_init(void)
1293 {
1294         int ret, t;
1295
1296         if (x86_hyper != &x86_hyper_ms_hyperv)
1297                 return -ENODEV;
1298
1299         init_completion(&probe_event);
1300
1301         /*
1302          * Get irq resources first.
1303          */
1304         ret = acpi_bus_register_driver(&vmbus_acpi_driver);
1305
1306         if (ret)
1307                 return ret;
1308
1309         t = wait_for_completion_timeout(&probe_event, 5*HZ);
1310         if (t == 0) {
1311                 ret = -ETIMEDOUT;
1312                 goto cleanup;
1313         }
1314
1315         if (irq <= 0) {
1316                 ret = -ENODEV;
1317                 goto cleanup;
1318         }
1319
1320         ret = vmbus_bus_init(irq);
1321         if (ret)
1322                 goto cleanup;
1323
1324         hv_setup_kexec_handler(hv_kexec_handler);
1325         hv_setup_crash_handler(hv_crash_handler);
1326
1327         return 0;
1328
1329 cleanup:
1330         acpi_bus_unregister_driver(&vmbus_acpi_driver);
1331         hv_acpi_dev = NULL;
1332         return ret;
1333 }
1334
1335 static void __exit vmbus_exit(void)
1336 {
1337         int cpu;
1338
1339         hv_remove_kexec_handler();
1340         hv_remove_crash_handler();
1341         vmbus_connection.conn_state = DISCONNECTED;
1342         hv_synic_clockevents_cleanup();
1343         vmbus_disconnect();
1344         hv_remove_vmbus_irq();
1345         tasklet_kill(&msg_dpc);
1346         vmbus_free_channels();
1347         if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
1348                 unregister_die_notifier(&hyperv_die_block);
1349                 atomic_notifier_chain_unregister(&panic_notifier_list,
1350                                                  &hyperv_panic_block);
1351         }
1352         bus_unregister(&hv_bus);
1353         hv_cleanup(false);
1354         for_each_online_cpu(cpu) {
1355                 tasklet_kill(hv_context.event_dpc[cpu]);
1356                 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1357         }
1358         hv_synic_free();
1359         acpi_bus_unregister_driver(&vmbus_acpi_driver);
1360         if (vmbus_proto_version > VERSION_WIN7)
1361                 cpu_hotplug_enable();
1362 }
1363
1364
1365 MODULE_LICENSE("GPL");
1366
1367 subsys_initcall(hv_acpi_init);
1368 module_exit(vmbus_exit);