GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / vhost / vsock.c
1 /*
2  * vhost transport for vsock
3  *
4  * Copyright (C) 2013-2015 Red Hat, Inc.
5  * Author: Asias He <asias@redhat.com>
6  *         Stefan Hajnoczi <stefanha@redhat.com>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2.
9  */
10 #include <linux/miscdevice.h>
11 #include <linux/atomic.h>
12 #include <linux/module.h>
13 #include <linux/mutex.h>
14 #include <linux/vmalloc.h>
15 #include <net/sock.h>
16 #include <linux/virtio_vsock.h>
17 #include <linux/vhost.h>
18 #include <linux/hashtable.h>
19
20 #include <net/af_vsock.h>
21 #include "vhost.h"
22
23 #define VHOST_VSOCK_DEFAULT_HOST_CID    2
24 /* Max number of bytes transferred before requeueing the job.
25  * Using this limit prevents one virtqueue from starving others. */
26 #define VHOST_VSOCK_WEIGHT 0x80000
27 /* Max number of packets transferred before requeueing the job.
28  * Using this limit prevents one virtqueue from starving others with
29  * small pkts.
30  */
31 #define VHOST_VSOCK_PKT_WEIGHT 256
32
33 enum {
34         VHOST_VSOCK_FEATURES = VHOST_FEATURES,
35 };
36
37 /* Used to track all the vhost_vsock instances on the system. */
38 static DEFINE_SPINLOCK(vhost_vsock_lock);
39 static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
40
41 struct vhost_vsock {
42         struct vhost_dev dev;
43         struct vhost_virtqueue vqs[2];
44
45         /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
46         struct hlist_node hash;
47
48         struct vhost_work send_pkt_work;
49         spinlock_t send_pkt_list_lock;
50         struct list_head send_pkt_list; /* host->guest pending packets */
51
52         atomic_t queued_replies;
53
54         u32 guest_cid;
55 };
56
57 static u32 vhost_transport_get_local_cid(void)
58 {
59         return VHOST_VSOCK_DEFAULT_HOST_CID;
60 }
61
62 /* Callers that dereference the return value must hold vhost_vsock_lock or the
63  * RCU read lock.
64  */
65 static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
66 {
67         struct vhost_vsock *vsock;
68
69         hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
70                 u32 other_cid = vsock->guest_cid;
71
72                 /* Skip instances that have no CID yet */
73                 if (other_cid == 0)
74                         continue;
75
76                 if (other_cid == guest_cid)
77                         return vsock;
78
79         }
80
81         return NULL;
82 }
83
84 static void
85 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
86                             struct vhost_virtqueue *vq)
87 {
88         struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
89         int pkts = 0, total_len = 0;
90         bool added = false;
91         bool restart_tx = false;
92
93         mutex_lock(&vq->mutex);
94
95         if (!vq->private_data)
96                 goto out;
97
98         /* Avoid further vmexits, we're already processing the virtqueue */
99         vhost_disable_notify(&vsock->dev, vq);
100
101         do {
102                 struct virtio_vsock_pkt *pkt;
103                 struct iov_iter iov_iter;
104                 unsigned out, in;
105                 size_t nbytes;
106                 size_t iov_len, payload_len;
107                 int head;
108
109                 spin_lock_bh(&vsock->send_pkt_list_lock);
110                 if (list_empty(&vsock->send_pkt_list)) {
111                         spin_unlock_bh(&vsock->send_pkt_list_lock);
112                         vhost_enable_notify(&vsock->dev, vq);
113                         break;
114                 }
115
116                 pkt = list_first_entry(&vsock->send_pkt_list,
117                                        struct virtio_vsock_pkt, list);
118                 list_del_init(&pkt->list);
119                 spin_unlock_bh(&vsock->send_pkt_list_lock);
120
121                 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
122                                          &out, &in, NULL, NULL);
123                 if (head < 0) {
124                         spin_lock_bh(&vsock->send_pkt_list_lock);
125                         list_add(&pkt->list, &vsock->send_pkt_list);
126                         spin_unlock_bh(&vsock->send_pkt_list_lock);
127                         break;
128                 }
129
130                 if (head == vq->num) {
131                         spin_lock_bh(&vsock->send_pkt_list_lock);
132                         list_add(&pkt->list, &vsock->send_pkt_list);
133                         spin_unlock_bh(&vsock->send_pkt_list_lock);
134
135                         /* We cannot finish yet if more buffers snuck in while
136                          * re-enabling notify.
137                          */
138                         if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
139                                 vhost_disable_notify(&vsock->dev, vq);
140                                 continue;
141                         }
142                         break;
143                 }
144
145                 if (out) {
146                         virtio_transport_free_pkt(pkt);
147                         vq_err(vq, "Expected 0 output buffers, got %u\n", out);
148                         break;
149                 }
150
151                 iov_len = iov_length(&vq->iov[out], in);
152                 if (iov_len < sizeof(pkt->hdr)) {
153                         virtio_transport_free_pkt(pkt);
154                         vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
155                         break;
156                 }
157
158                 iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
159                 payload_len = pkt->len - pkt->off;
160
161                 /* If the packet is greater than the space available in the
162                  * buffer, we split it using multiple buffers.
163                  */
164                 if (payload_len > iov_len - sizeof(pkt->hdr))
165                         payload_len = iov_len - sizeof(pkt->hdr);
166
167                 /* Set the correct length in the header */
168                 pkt->hdr.len = cpu_to_le32(payload_len);
169
170                 nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
171                 if (nbytes != sizeof(pkt->hdr)) {
172                         virtio_transport_free_pkt(pkt);
173                         vq_err(vq, "Faulted on copying pkt hdr\n");
174                         break;
175                 }
176
177                 nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
178                                       &iov_iter);
179                 if (nbytes != payload_len) {
180                         virtio_transport_free_pkt(pkt);
181                         vq_err(vq, "Faulted on copying pkt buf\n");
182                         break;
183                 }
184
185                 /* Deliver to monitoring devices all packets that we
186                  * will transmit.
187                  */
188                 virtio_transport_deliver_tap_pkt(pkt);
189
190                 vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
191                 added = true;
192
193                 pkt->off += payload_len;
194                 total_len += payload_len;
195
196                 /* If we didn't send all the payload we can requeue the packet
197                  * to send it with the next available buffer.
198                  */
199                 if (pkt->off < pkt->len) {
200                         spin_lock_bh(&vsock->send_pkt_list_lock);
201                         list_add(&pkt->list, &vsock->send_pkt_list);
202                         spin_unlock_bh(&vsock->send_pkt_list_lock);
203                 } else {
204                         if (pkt->reply) {
205                                 int val;
206
207                                 val = atomic_dec_return(&vsock->queued_replies);
208
209                                 /* Do we have resources to resume tx
210                                  * processing?
211                                  */
212                                 if (val + 1 == tx_vq->num)
213                                         restart_tx = true;
214                         }
215
216                         virtio_transport_free_pkt(pkt);
217                 }
218         } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
219         if (added)
220                 vhost_signal(&vsock->dev, vq);
221
222 out:
223         mutex_unlock(&vq->mutex);
224
225         if (restart_tx)
226                 vhost_poll_queue(&tx_vq->poll);
227 }
228
229 static void vhost_transport_send_pkt_work(struct vhost_work *work)
230 {
231         struct vhost_virtqueue *vq;
232         struct vhost_vsock *vsock;
233
234         vsock = container_of(work, struct vhost_vsock, send_pkt_work);
235         vq = &vsock->vqs[VSOCK_VQ_RX];
236
237         vhost_transport_do_send_pkt(vsock, vq);
238 }
239
240 static int
241 vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
242 {
243         struct vhost_vsock *vsock;
244         int len = pkt->len;
245
246         rcu_read_lock();
247
248         /* Find the vhost_vsock according to guest context id  */
249         vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
250         if (!vsock) {
251                 rcu_read_unlock();
252                 virtio_transport_free_pkt(pkt);
253                 return -ENODEV;
254         }
255
256         if (pkt->reply)
257                 atomic_inc(&vsock->queued_replies);
258
259         spin_lock_bh(&vsock->send_pkt_list_lock);
260         list_add_tail(&pkt->list, &vsock->send_pkt_list);
261         spin_unlock_bh(&vsock->send_pkt_list_lock);
262
263         vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
264
265         rcu_read_unlock();
266         return len;
267 }
268
269 static int
270 vhost_transport_cancel_pkt(struct vsock_sock *vsk)
271 {
272         struct vhost_vsock *vsock;
273         struct virtio_vsock_pkt *pkt, *n;
274         int cnt = 0;
275         int ret = -ENODEV;
276         LIST_HEAD(freeme);
277
278         rcu_read_lock();
279
280         /* Find the vhost_vsock according to guest context id  */
281         vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
282         if (!vsock)
283                 goto out;
284
285         spin_lock_bh(&vsock->send_pkt_list_lock);
286         list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
287                 if (pkt->vsk != vsk)
288                         continue;
289                 list_move(&pkt->list, &freeme);
290         }
291         spin_unlock_bh(&vsock->send_pkt_list_lock);
292
293         list_for_each_entry_safe(pkt, n, &freeme, list) {
294                 if (pkt->reply)
295                         cnt++;
296                 list_del(&pkt->list);
297                 virtio_transport_free_pkt(pkt);
298         }
299
300         if (cnt) {
301                 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
302                 int new_cnt;
303
304                 new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
305                 if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
306                         vhost_poll_queue(&tx_vq->poll);
307         }
308
309         ret = 0;
310 out:
311         rcu_read_unlock();
312         return ret;
313 }
314
315 static struct virtio_vsock_pkt *
316 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
317                       unsigned int out, unsigned int in)
318 {
319         struct virtio_vsock_pkt *pkt;
320         struct iov_iter iov_iter;
321         size_t nbytes;
322         size_t len;
323
324         if (in != 0) {
325                 vq_err(vq, "Expected 0 input buffers, got %u\n", in);
326                 return NULL;
327         }
328
329         pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
330         if (!pkt)
331                 return NULL;
332
333         len = iov_length(vq->iov, out);
334         iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
335
336         nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
337         if (nbytes != sizeof(pkt->hdr)) {
338                 vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
339                        sizeof(pkt->hdr), nbytes);
340                 kfree(pkt);
341                 return NULL;
342         }
343
344         if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
345                 pkt->len = le32_to_cpu(pkt->hdr.len);
346
347         /* No payload */
348         if (!pkt->len)
349                 return pkt;
350
351         /* The pkt is too big */
352         if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
353                 kfree(pkt);
354                 return NULL;
355         }
356
357         pkt->buf = kvmalloc(pkt->len, GFP_KERNEL);
358         if (!pkt->buf) {
359                 kfree(pkt);
360                 return NULL;
361         }
362
363         nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
364         if (nbytes != pkt->len) {
365                 vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
366                        pkt->len, nbytes);
367                 virtio_transport_free_pkt(pkt);
368                 return NULL;
369         }
370
371         return pkt;
372 }
373
374 /* Is there space left for replies to rx packets? */
375 static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
376 {
377         struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
378         int val;
379
380         smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
381         val = atomic_read(&vsock->queued_replies);
382
383         return val < vq->num;
384 }
385
386 static struct virtio_transport vhost_transport = {
387         .transport = {
388                 .get_local_cid            = vhost_transport_get_local_cid,
389
390                 .init                     = virtio_transport_do_socket_init,
391                 .destruct                 = virtio_transport_destruct,
392                 .release                  = virtio_transport_release,
393                 .connect                  = virtio_transport_connect,
394                 .shutdown                 = virtio_transport_shutdown,
395                 .cancel_pkt               = vhost_transport_cancel_pkt,
396
397                 .dgram_enqueue            = virtio_transport_dgram_enqueue,
398                 .dgram_dequeue            = virtio_transport_dgram_dequeue,
399                 .dgram_bind               = virtio_transport_dgram_bind,
400                 .dgram_allow              = virtio_transport_dgram_allow,
401
402                 .stream_enqueue           = virtio_transport_stream_enqueue,
403                 .stream_dequeue           = virtio_transport_stream_dequeue,
404                 .stream_has_data          = virtio_transport_stream_has_data,
405                 .stream_has_space         = virtio_transport_stream_has_space,
406                 .stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
407                 .stream_is_active         = virtio_transport_stream_is_active,
408                 .stream_allow             = virtio_transport_stream_allow,
409
410                 .notify_poll_in           = virtio_transport_notify_poll_in,
411                 .notify_poll_out          = virtio_transport_notify_poll_out,
412                 .notify_recv_init         = virtio_transport_notify_recv_init,
413                 .notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
414                 .notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
415                 .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
416                 .notify_send_init         = virtio_transport_notify_send_init,
417                 .notify_send_pre_block    = virtio_transport_notify_send_pre_block,
418                 .notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
419                 .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
420
421                 .set_buffer_size          = virtio_transport_set_buffer_size,
422                 .set_min_buffer_size      = virtio_transport_set_min_buffer_size,
423                 .set_max_buffer_size      = virtio_transport_set_max_buffer_size,
424                 .get_buffer_size          = virtio_transport_get_buffer_size,
425                 .get_min_buffer_size      = virtio_transport_get_min_buffer_size,
426                 .get_max_buffer_size      = virtio_transport_get_max_buffer_size,
427         },
428
429         .send_pkt = vhost_transport_send_pkt,
430 };
431
432 static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
433 {
434         struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
435                                                   poll.work);
436         struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
437                                                  dev);
438         struct virtio_vsock_pkt *pkt;
439         int head, pkts = 0, total_len = 0;
440         unsigned int out, in;
441         bool added = false;
442
443         mutex_lock(&vq->mutex);
444
445         if (!vq->private_data)
446                 goto out;
447
448         vhost_disable_notify(&vsock->dev, vq);
449         do {
450                 u32 len;
451
452                 if (!vhost_vsock_more_replies(vsock)) {
453                         /* Stop tx until the device processes already
454                          * pending replies.  Leave tx virtqueue
455                          * callbacks disabled.
456                          */
457                         goto no_more_replies;
458                 }
459
460                 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
461                                          &out, &in, NULL, NULL);
462                 if (head < 0)
463                         break;
464
465                 if (head == vq->num) {
466                         if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
467                                 vhost_disable_notify(&vsock->dev, vq);
468                                 continue;
469                         }
470                         break;
471                 }
472
473                 pkt = vhost_vsock_alloc_pkt(vq, out, in);
474                 if (!pkt) {
475                         vq_err(vq, "Faulted on pkt\n");
476                         continue;
477                 }
478
479                 len = pkt->len;
480
481                 /* Deliver to monitoring devices all received packets */
482                 virtio_transport_deliver_tap_pkt(pkt);
483
484                 /* Only accept correctly addressed packets */
485                 if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
486                     le64_to_cpu(pkt->hdr.dst_cid) ==
487                     vhost_transport_get_local_cid())
488                         virtio_transport_recv_pkt(&vhost_transport, pkt);
489                 else
490                         virtio_transport_free_pkt(pkt);
491
492                 len += sizeof(pkt->hdr);
493                 vhost_add_used(vq, head, 0);
494                 total_len += len;
495                 added = true;
496         } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
497
498 no_more_replies:
499         if (added)
500                 vhost_signal(&vsock->dev, vq);
501
502 out:
503         mutex_unlock(&vq->mutex);
504 }
505
506 static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
507 {
508         struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
509                                                 poll.work);
510         struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
511                                                  dev);
512
513         vhost_transport_do_send_pkt(vsock, vq);
514 }
515
516 static int vhost_vsock_start(struct vhost_vsock *vsock)
517 {
518         struct vhost_virtqueue *vq;
519         size_t i;
520         int ret;
521
522         mutex_lock(&vsock->dev.mutex);
523
524         ret = vhost_dev_check_owner(&vsock->dev);
525         if (ret)
526                 goto err;
527
528         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
529                 vq = &vsock->vqs[i];
530
531                 mutex_lock(&vq->mutex);
532
533                 if (!vhost_vq_access_ok(vq)) {
534                         ret = -EFAULT;
535                         goto err_vq;
536                 }
537
538                 if (!vq->private_data) {
539                         vq->private_data = vsock;
540                         ret = vhost_vq_init_access(vq);
541                         if (ret)
542                                 goto err_vq;
543                 }
544
545                 mutex_unlock(&vq->mutex);
546         }
547
548         /* Some packets may have been queued before the device was started,
549          * let's kick the send worker to send them.
550          */
551         vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
552
553         mutex_unlock(&vsock->dev.mutex);
554         return 0;
555
556 err_vq:
557         vq->private_data = NULL;
558         mutex_unlock(&vq->mutex);
559
560         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
561                 vq = &vsock->vqs[i];
562
563                 mutex_lock(&vq->mutex);
564                 vq->private_data = NULL;
565                 mutex_unlock(&vq->mutex);
566         }
567 err:
568         mutex_unlock(&vsock->dev.mutex);
569         return ret;
570 }
571
572 static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
573 {
574         size_t i;
575         int ret = 0;
576
577         mutex_lock(&vsock->dev.mutex);
578
579         if (check_owner) {
580                 ret = vhost_dev_check_owner(&vsock->dev);
581                 if (ret)
582                         goto err;
583         }
584
585         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
586                 struct vhost_virtqueue *vq = &vsock->vqs[i];
587
588                 mutex_lock(&vq->mutex);
589                 vq->private_data = NULL;
590                 mutex_unlock(&vq->mutex);
591         }
592
593 err:
594         mutex_unlock(&vsock->dev.mutex);
595         return ret;
596 }
597
598 static void vhost_vsock_free(struct vhost_vsock *vsock)
599 {
600         kvfree(vsock);
601 }
602
603 static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
604 {
605         struct vhost_virtqueue **vqs;
606         struct vhost_vsock *vsock;
607         int ret;
608
609         /* This struct is large and allocation could fail, fall back to vmalloc
610          * if there is no other way.
611          */
612         vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
613         if (!vsock)
614                 return -ENOMEM;
615
616         vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
617         if (!vqs) {
618                 ret = -ENOMEM;
619                 goto out;
620         }
621
622         vsock->guest_cid = 0; /* no CID assigned yet */
623
624         atomic_set(&vsock->queued_replies, 0);
625
626         vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
627         vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
628         vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
629         vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
630
631         vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
632                        UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
633                        VHOST_VSOCK_WEIGHT);
634
635         file->private_data = vsock;
636         spin_lock_init(&vsock->send_pkt_list_lock);
637         INIT_LIST_HEAD(&vsock->send_pkt_list);
638         vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
639         return 0;
640
641 out:
642         vhost_vsock_free(vsock);
643         return ret;
644 }
645
646 static void vhost_vsock_flush(struct vhost_vsock *vsock)
647 {
648         int i;
649
650         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
651                 if (vsock->vqs[i].handle_kick)
652                         vhost_poll_flush(&vsock->vqs[i].poll);
653         vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
654 }
655
656 static void vhost_vsock_reset_orphans(struct sock *sk)
657 {
658         struct vsock_sock *vsk = vsock_sk(sk);
659
660         /* vmci_transport.c doesn't take sk_lock here either.  At least we're
661          * under vsock_table_lock so the sock cannot disappear while we're
662          * executing.
663          */
664
665         /* If the peer is still valid, no need to reset connection */
666         if (vhost_vsock_get(vsk->remote_addr.svm_cid))
667                 return;
668
669         /* If the close timeout is pending, let it expire.  This avoids races
670          * with the timeout callback.
671          */
672         if (vsk->close_work_scheduled)
673                 return;
674
675         sock_set_flag(sk, SOCK_DONE);
676         vsk->peer_shutdown = SHUTDOWN_MASK;
677         sk->sk_state = SS_UNCONNECTED;
678         sk->sk_err = ECONNRESET;
679         sk->sk_error_report(sk);
680 }
681
682 static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
683 {
684         struct vhost_vsock *vsock = file->private_data;
685
686         spin_lock_bh(&vhost_vsock_lock);
687         if (vsock->guest_cid)
688                 hash_del_rcu(&vsock->hash);
689         spin_unlock_bh(&vhost_vsock_lock);
690
691         /* Wait for other CPUs to finish using vsock */
692         synchronize_rcu();
693
694         /* Iterating over all connections for all CIDs to find orphans is
695          * inefficient.  Room for improvement here. */
696         vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
697
698         /* Don't check the owner, because we are in the release path, so we
699          * need to stop the vsock device in any case.
700          * vhost_vsock_stop() can not fail in this case, so we don't need to
701          * check the return code.
702          */
703         vhost_vsock_stop(vsock, false);
704         vhost_vsock_flush(vsock);
705         vhost_dev_stop(&vsock->dev);
706
707         spin_lock_bh(&vsock->send_pkt_list_lock);
708         while (!list_empty(&vsock->send_pkt_list)) {
709                 struct virtio_vsock_pkt *pkt;
710
711                 pkt = list_first_entry(&vsock->send_pkt_list,
712                                 struct virtio_vsock_pkt, list);
713                 list_del_init(&pkt->list);
714                 virtio_transport_free_pkt(pkt);
715         }
716         spin_unlock_bh(&vsock->send_pkt_list_lock);
717
718         vhost_dev_cleanup(&vsock->dev);
719         kfree(vsock->dev.vqs);
720         vhost_vsock_free(vsock);
721         return 0;
722 }
723
724 static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
725 {
726         struct vhost_vsock *other;
727
728         /* Refuse reserved CIDs */
729         if (guest_cid <= VMADDR_CID_HOST ||
730             guest_cid == U32_MAX)
731                 return -EINVAL;
732
733         /* 64-bit CIDs are not yet supported */
734         if (guest_cid > U32_MAX)
735                 return -EINVAL;
736
737         /* Refuse if CID is already in use */
738         spin_lock_bh(&vhost_vsock_lock);
739         other = vhost_vsock_get(guest_cid);
740         if (other && other != vsock) {
741                 spin_unlock_bh(&vhost_vsock_lock);
742                 return -EADDRINUSE;
743         }
744
745         if (vsock->guest_cid)
746                 hash_del_rcu(&vsock->hash);
747
748         vsock->guest_cid = guest_cid;
749         hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
750         spin_unlock_bh(&vhost_vsock_lock);
751
752         return 0;
753 }
754
755 static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
756 {
757         struct vhost_virtqueue *vq;
758         int i;
759
760         if (features & ~VHOST_VSOCK_FEATURES)
761                 return -EOPNOTSUPP;
762
763         mutex_lock(&vsock->dev.mutex);
764         if ((features & (1 << VHOST_F_LOG_ALL)) &&
765             !vhost_log_access_ok(&vsock->dev)) {
766                 mutex_unlock(&vsock->dev.mutex);
767                 return -EFAULT;
768         }
769
770         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
771                 vq = &vsock->vqs[i];
772                 mutex_lock(&vq->mutex);
773                 vq->acked_features = features;
774                 mutex_unlock(&vq->mutex);
775         }
776         mutex_unlock(&vsock->dev.mutex);
777         return 0;
778 }
779
780 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
781                                   unsigned long arg)
782 {
783         struct vhost_vsock *vsock = f->private_data;
784         void __user *argp = (void __user *)arg;
785         u64 guest_cid;
786         u64 features;
787         int start;
788         int r;
789
790         switch (ioctl) {
791         case VHOST_VSOCK_SET_GUEST_CID:
792                 if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
793                         return -EFAULT;
794                 return vhost_vsock_set_cid(vsock, guest_cid);
795         case VHOST_VSOCK_SET_RUNNING:
796                 if (copy_from_user(&start, argp, sizeof(start)))
797                         return -EFAULT;
798                 if (start)
799                         return vhost_vsock_start(vsock);
800                 else
801                         return vhost_vsock_stop(vsock, true);
802         case VHOST_GET_FEATURES:
803                 features = VHOST_VSOCK_FEATURES;
804                 if (copy_to_user(argp, &features, sizeof(features)))
805                         return -EFAULT;
806                 return 0;
807         case VHOST_SET_FEATURES:
808                 if (copy_from_user(&features, argp, sizeof(features)))
809                         return -EFAULT;
810                 return vhost_vsock_set_features(vsock, features);
811         default:
812                 mutex_lock(&vsock->dev.mutex);
813                 r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
814                 if (r == -ENOIOCTLCMD)
815                         r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
816                 else
817                         vhost_vsock_flush(vsock);
818                 mutex_unlock(&vsock->dev.mutex);
819                 return r;
820         }
821 }
822
823 #ifdef CONFIG_COMPAT
824 static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl,
825                                          unsigned long arg)
826 {
827         return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
828 }
829 #endif
830
831 static const struct file_operations vhost_vsock_fops = {
832         .owner          = THIS_MODULE,
833         .open           = vhost_vsock_dev_open,
834         .release        = vhost_vsock_dev_release,
835         .llseek         = noop_llseek,
836         .unlocked_ioctl = vhost_vsock_dev_ioctl,
837 #ifdef CONFIG_COMPAT
838         .compat_ioctl   = vhost_vsock_dev_compat_ioctl,
839 #endif
840 };
841
842 static struct miscdevice vhost_vsock_misc = {
843         .minor = VHOST_VSOCK_MINOR,
844         .name = "vhost-vsock",
845         .fops = &vhost_vsock_fops,
846 };
847
848 static int __init vhost_vsock_init(void)
849 {
850         int ret;
851
852         ret = vsock_core_init(&vhost_transport.transport);
853         if (ret < 0)
854                 return ret;
855         return misc_register(&vhost_vsock_misc);
856 };
857
858 static void __exit vhost_vsock_exit(void)
859 {
860         misc_deregister(&vhost_vsock_misc);
861         vsock_core_exit();
862 };
863
864 module_init(vhost_vsock_init);
865 module_exit(vhost_vsock_exit);
866 MODULE_LICENSE("GPL v2");
867 MODULE_AUTHOR("Asias He");
868 MODULE_DESCRIPTION("vhost transport for vsock ");
869 MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
870 MODULE_ALIAS("devname:vhost-vsock");