GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / staging / unisys / visornic / visornic_main.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
3  * All rights reserved.
4  */
5
6 /* This driver lives in a spar partition, and registers to ethernet io
7  * channels from the visorbus driver. It creates netdev devices and
8  * forwards transmit to the IO channel and accepts rcvs from the IO
9  * Partition via the IO channel.
10  */
11
12 #include <linux/debugfs.h>
13 #include <linux/etherdevice.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/kthread.h>
17 #include <linux/skbuff.h>
18 #include <linux/rtnetlink.h>
19 #include <linux/visorbus.h>
20
21 #include "iochannel.h"
22
23 #define VISORNIC_INFINITE_RSP_WAIT 0
24
25 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
26  *         = 163840 bytes
27  */
28 #define MAX_BUF 163840
29 #define NAPI_WEIGHT 64
30
31 /* GUIDS for director channel type supported by this driver.  */
32 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
33 #define VISOR_VNIC_CHANNEL_GUID \
34         GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
35                 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
36 #define VISOR_VNIC_CHANNEL_GUID_STR \
37         "8cd5994d-c58e-11da-95a9-00e08161165f"
38
39 static struct visor_channeltype_descriptor visornic_channel_types[] = {
40         /* Note that the only channel type we expect to be reported by the
41          * bus driver is the VISOR_VNIC channel.
42          */
43         { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
44           VISOR_VNIC_CHANNEL_VERSIONID },
45         {}
46 };
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
48 /* FIXME XXX: This next line of code must be fixed and removed before
49  * acceptance into the 'normal' part of the kernel.  It is only here as a place
50  * holder to get module autoloading functionality working for visorbus.  Code
51  * must be added to scripts/mode/file2alias.c, etc., to get this working
52  * properly.
53  */
54 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
55
56 struct chanstat {
57         unsigned long got_rcv;
58         unsigned long got_enbdisack;
59         unsigned long got_xmit_done;
60         unsigned long xmit_fail;
61         unsigned long sent_enbdis;
62         unsigned long sent_promisc;
63         unsigned long sent_post;
64         unsigned long sent_post_failed;
65         unsigned long sent_xmit;
66         unsigned long reject_count;
67         unsigned long extra_rcvbufs_sent;
68 };
69
70 /* struct visornic_devdata
71  * @enabled:                        0 disabled 1 enabled to receive.
72  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
73  * @struct *dev:
74  * @struct *netdev:
75  * @struct net_stats:
76  * @interrupt_rcvd:
77  * @rsp_queue:
78  * @struct **rcvbuf:
79  * @incarnation_id:                 incarnation_id lets IOPART know about
80  *                                  re-birth.
81  * @old_flags:                      flags as they were prior to
82  *                                  set_multicast_list.
83  * @usage:                          count of users.
84  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
85  * @num_rcv_bufs_could_not_alloc:
86  * @num_rcvbuf_in_iovm:
87  * @alloc_failed_in_if_needed_cnt:
88  * @alloc_failed_in_repost_rtn_cnt:
89  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
90  *                                  - should never hit this.
91  * @upper_threshold_net_xmits:      high water mark for calling
92  *                                  netif_stop_queue().
93  * @lower_threshold_net_xmits:      high water mark for calling
94  *                                  netif_wake_queue().
95  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
96  *                                  sent to the IOPART end.
97  * @server_down_complete_func:
98  * @struct timeout_reset:
99  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
100  *                                  buffers.
101  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
102  *                                  active xmit at a time.
103  * @server_down:                    IOPART is down.
104  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
105  * @going_away:                     device is being torn down.
106  * @struct *eth_debugfs_dir:
107  * @interrupts_rcvd:
108  * @interrupts_notme:
109  * @interrupts_disabled:
110  * @busy_cnt:
111  * @priv_lock:                      spinlock to access devdata structures.
112  * @flow_control_upper_hits:
113  * @flow_control_lower_hits:
114  * @n_rcv0:                         # rcvs of 0 buffers.
115  * @n_rcv1:                         # rcvs of 1 buffers.
116  * @n_rcv2:                         # rcvs of 2 buffers.
117  * @n_rcvx:                         # rcvs of >2 buffers.
118  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
119  * @repost_found_skb_cnt:           # of found the skb.
120  * @n_repost_deficit:               # of lost rcv buffers.
121  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
122  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
123  * @queuefullmsg_logged:
124  * @struct chstat:
125  * @struct irq_poll_timer:
126  * @struct napi:
127  * @struct cmdrsp:
128  */
129 struct visornic_devdata {
130         unsigned short enabled;
131         unsigned short enab_dis_acked;
132
133         struct visor_device *dev;
134         struct net_device *netdev;
135         struct net_device_stats net_stats;
136         atomic_t interrupt_rcvd;
137         wait_queue_head_t rsp_queue;
138         struct sk_buff **rcvbuf;
139         u64 incarnation_id;
140         unsigned short old_flags;
141         atomic_t usage;
142
143         int num_rcv_bufs;
144         int num_rcv_bufs_could_not_alloc;
145         atomic_t num_rcvbuf_in_iovm;
146         unsigned long alloc_failed_in_if_needed_cnt;
147         unsigned long alloc_failed_in_repost_rtn_cnt;
148
149         unsigned long max_outstanding_net_xmits;
150         unsigned long upper_threshold_net_xmits;
151         unsigned long lower_threshold_net_xmits;
152         struct sk_buff_head xmitbufhead;
153
154         visorbus_state_complete_func server_down_complete_func;
155         struct work_struct timeout_reset;
156         struct uiscmdrsp *cmdrsp_rcv;
157         struct uiscmdrsp *xmit_cmdrsp;
158         bool server_down;
159         bool server_change_state;
160         bool going_away;
161         struct dentry *eth_debugfs_dir;
162         u64 interrupts_rcvd;
163         u64 interrupts_notme;
164         u64 interrupts_disabled;
165         u64 busy_cnt;
166         /* spinlock to access devdata structures. */
167         spinlock_t priv_lock;
168
169         /* flow control counter */
170         u64 flow_control_upper_hits;
171         u64 flow_control_lower_hits;
172
173         /* debug counters */
174         unsigned long n_rcv0;
175         unsigned long n_rcv1;
176         unsigned long n_rcv2;
177         unsigned long n_rcvx;
178         unsigned long found_repost_rcvbuf_cnt;
179         unsigned long repost_found_skb_cnt;
180         unsigned long n_repost_deficit;
181         unsigned long bad_rcv_buf;
182         unsigned long n_rcv_packets_not_accepted;
183
184         int queuefullmsg_logged;
185         struct chanstat chstat;
186         struct timer_list irq_poll_timer;
187         struct napi_struct napi;
188         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
189 };
190
191 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
192 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
193                                 u16 index, u16 max_pi_arr_entries,
194                                 struct phys_info pi_arr[])
195 {
196         u16 i, len, firstlen;
197
198         firstlen = PI_PAGE_SIZE - inp_off;
199         if (inp_len <= firstlen) {
200                 /* The input entry spans only one page - add as is. */
201                 if (index >= max_pi_arr_entries)
202                         return 0;
203                 pi_arr[index].pi_pfn = inp_pfn;
204                 pi_arr[index].pi_off = (u16)inp_off;
205                 pi_arr[index].pi_len = (u16)inp_len;
206                 return index + 1;
207         }
208
209         /* This entry spans multiple pages. */
210         for (len = inp_len, i = 0; len;
211                 len -= pi_arr[index + i].pi_len, i++) {
212                 if (index + i >= max_pi_arr_entries)
213                         return 0;
214                 pi_arr[index + i].pi_pfn = inp_pfn + i;
215                 if (i == 0) {
216                         pi_arr[index].pi_off = inp_off;
217                         pi_arr[index].pi_len = firstlen;
218                 } else {
219                         pi_arr[index + i].pi_off = 0;
220                         pi_arr[index + i].pi_len = min_t(u16, len,
221                                                          PI_PAGE_SIZE);
222                 }
223         }
224         return index + i;
225 }
226
227 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
228  *                                 array that the IOPART understands
229  * @skb:          Skbuff that we are pulling the frags from.
230  * @firstfraglen: Length of first fragment in skb.
231  * @frags_max:    Max len of frags array.
232  * @frags:        Frags array filled in on output.
233  *
234  * Return: Positive integer indicating number of entries filled in frags on
235  *         success, negative integer on error.
236  */
237 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
238                                          unsigned int firstfraglen,
239                                          unsigned int frags_max,
240                                          struct phys_info frags[])
241 {
242         unsigned int count = 0, frag, size, offset = 0, numfrags;
243         unsigned int total_count;
244
245         numfrags = skb_shinfo(skb)->nr_frags;
246
247         /* Compute the number of fragments this skb has, and if its more than
248          * frag array can hold, linearize the skb
249          */
250         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
251         if (firstfraglen % PI_PAGE_SIZE)
252                 total_count++;
253
254         if (total_count > frags_max) {
255                 if (skb_linearize(skb))
256                         return -EINVAL;
257                 numfrags = skb_shinfo(skb)->nr_frags;
258                 firstfraglen = 0;
259         }
260
261         while (firstfraglen) {
262                 if (count == frags_max)
263                         return -EINVAL;
264
265                 frags[count].pi_pfn =
266                         page_to_pfn(virt_to_page(skb->data + offset));
267                 frags[count].pi_off =
268                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
269                 size = min_t(unsigned int, firstfraglen,
270                              PI_PAGE_SIZE - frags[count].pi_off);
271
272                 /* can take smallest of firstfraglen (what's left) OR
273                  * bytes left in the page
274                  */
275                 frags[count].pi_len = size;
276                 firstfraglen -= size;
277                 offset += size;
278                 count++;
279         }
280         if (numfrags) {
281                 if ((count + numfrags) > frags_max)
282                         return -EINVAL;
283
284                 for (frag = 0; frag < numfrags; frag++) {
285                         count = add_physinfo_entries(page_to_pfn(
286                                   skb_frag_page(&skb_shinfo(skb)->frags[frag])),
287                                   skb_shinfo(skb)->frags[frag].page_offset,
288                                   skb_shinfo(skb)->frags[frag].size, count,
289                                   frags_max, frags);
290                         /* add_physinfo_entries only returns
291                          * zero if the frags array is out of room
292                          * That should never happen because we
293                          * fail above, if count+numfrags > frags_max.
294                          */
295                         if (!count)
296                                 return -EINVAL;
297                 }
298         }
299         if (skb_shinfo(skb)->frag_list) {
300                 struct sk_buff *skbinlist;
301                 int c;
302
303                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
304                      skbinlist = skbinlist->next) {
305                         c = visor_copy_fragsinfo_from_skb(skbinlist,
306                                                           skbinlist->len -
307                                                           skbinlist->data_len,
308                                                           frags_max - count,
309                                                           &frags[count]);
310                         if (c < 0)
311                                 return c;
312                         count += c;
313                 }
314         }
315         return count;
316 }
317
318 static ssize_t enable_ints_write(struct file *file,
319                                  const char __user *buffer,
320                                  size_t count, loff_t *ppos)
321 {
322         /* Don't want to break ABI here by having a debugfs
323          * file that no longer exists or is writable, so
324          * lets just make this a vestigual function
325          */
326         return count;
327 }
328
329 static const struct file_operations debugfs_enable_ints_fops = {
330         .write = enable_ints_write,
331 };
332
333 /* visornic_serverdown_complete - pause device following IOPART going down
334  * @devdata: Device managed by IOPART.
335  *
336  * The IO partition has gone down, and we need to do some cleanup for when it
337  * comes back. Treat the IO partition as the link being down.
338  */
339 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
340 {
341         struct net_device *netdev = devdata->netdev;
342
343         /* Stop polling for interrupts */
344         del_timer_sync(&devdata->irq_poll_timer);
345
346         rtnl_lock();
347         dev_close(netdev);
348         rtnl_unlock();
349
350         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
351         devdata->chstat.sent_xmit = 0;
352         devdata->chstat.got_xmit_done = 0;
353
354         if (devdata->server_down_complete_func)
355                 (*devdata->server_down_complete_func)(devdata->dev, 0);
356
357         devdata->server_down = true;
358         devdata->server_change_state = false;
359         devdata->server_down_complete_func = NULL;
360 }
361
362 /* visornic_serverdown - Command has notified us that IOPART is down
363  * @devdata:       Device managed by IOPART.
364  * @complete_func: Function to call when finished.
365  *
366  * Schedule the work needed to handle the server down request. Make sure we
367  * haven't already handled the server change state event.
368  *
369  * Return: 0 if we scheduled the work, negative integer on error.
370  */
371 static int visornic_serverdown(struct visornic_devdata *devdata,
372                                visorbus_state_complete_func complete_func)
373 {
374         unsigned long flags;
375         int err;
376
377         spin_lock_irqsave(&devdata->priv_lock, flags);
378         if (devdata->server_change_state) {
379                 dev_dbg(&devdata->dev->device, "%s changing state\n",
380                         __func__);
381                 err = -EINVAL;
382                 goto err_unlock;
383         }
384         if (devdata->server_down) {
385                 dev_dbg(&devdata->dev->device, "%s already down\n",
386                         __func__);
387                 err = -EINVAL;
388                 goto err_unlock;
389         }
390         if (devdata->going_away) {
391                 dev_dbg(&devdata->dev->device,
392                         "%s aborting because device removal pending\n",
393                         __func__);
394                 err = -ENODEV;
395                 goto err_unlock;
396         }
397         devdata->server_change_state = true;
398         devdata->server_down_complete_func = complete_func;
399         spin_unlock_irqrestore(&devdata->priv_lock, flags);
400
401         visornic_serverdown_complete(devdata);
402         return 0;
403
404 err_unlock:
405         spin_unlock_irqrestore(&devdata->priv_lock, flags);
406         return err;
407 }
408
409 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
410  * @netdev: Network adapter the rcv bufs are attached too.
411  *
412  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
413  * so that it can write rcv data into our memory space.
414  *
415  * Return: Pointer to sk_buff.
416  */
417 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
418 {
419         struct sk_buff *skb;
420
421         /* NOTE: the first fragment in each rcv buffer is pointed to by
422          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
423          * in length, so the first frag is large enough to hold 1514.
424          */
425         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
426         if (!skb)
427                 return NULL;
428         skb->dev = netdev;
429         /* current value of mtu doesn't come into play here; large
430          * packets will just end up using multiple rcv buffers all of
431          * same size.
432          */
433         skb->len = RCVPOST_BUF_SIZE;
434         /* alloc_skb already zeroes it out for clarification. */
435         skb->data_len = 0;
436         return skb;
437 }
438
439 /* post_skb - post a skb to the IO Partition
440  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
441  * @devdata: visornic_devdata to post the skb to.
442  * @skb:     Skb to give to the IO partition.
443  *
444  * Return: 0 on success, negative integer on error.
445  */
446 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
447                     struct sk_buff *skb)
448 {
449         int err;
450
451         cmdrsp->net.buf = skb;
452         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
453         cmdrsp->net.rcvpost.frag.pi_off =
454                 (unsigned long)skb->data & PI_PAGE_MASK;
455         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
456         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
457
458         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
459                 return -EINVAL;
460
461         cmdrsp->net.type = NET_RCV_POST;
462         cmdrsp->cmdtype = CMD_NET_TYPE;
463         err = visorchannel_signalinsert(devdata->dev->visorchannel,
464                                         IOCHAN_TO_IOPART,
465                                         cmdrsp);
466         if (err) {
467                 devdata->chstat.sent_post_failed++;
468                 return err;
469         }
470
471         atomic_inc(&devdata->num_rcvbuf_in_iovm);
472         devdata->chstat.sent_post++;
473         return 0;
474 }
475
476 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
477  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
478  * @state:   Enable = 1/disable = 0.
479  * @devdata: Visornic device we are enabling/disabling.
480  *
481  * Send the enable/disable message to the IO Partition.
482  *
483  * Return: 0 on success, negative integer on error.
484  */
485 static int send_enbdis(struct net_device *netdev, int state,
486                        struct visornic_devdata *devdata)
487 {
488         int err;
489
490         devdata->cmdrsp_rcv->net.enbdis.enable = state;
491         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
492         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
493         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
494         err = visorchannel_signalinsert(devdata->dev->visorchannel,
495                                         IOCHAN_TO_IOPART,
496                                         devdata->cmdrsp_rcv);
497         if (err)
498                 return err;
499         devdata->chstat.sent_enbdis++;
500         return 0;
501 }
502
503 /* visornic_disable_with_timeout - disable network adapter
504  * @netdev:  netdevice to disable.
505  * @timeout: Timeout to wait for disable.
506  *
507  * Disable the network adapter and inform the IO Partition that we are disabled.
508  * Reclaim memory from rcv bufs.
509  *
510  * Return: 0 on success, negative integer on failure of IO Partition responding.
511  */
512 static int visornic_disable_with_timeout(struct net_device *netdev,
513                                          const int timeout)
514 {
515         struct visornic_devdata *devdata = netdev_priv(netdev);
516         int i;
517         unsigned long flags;
518         int wait = 0;
519         int err;
520
521         /* send a msg telling the other end we are stopping incoming pkts */
522         spin_lock_irqsave(&devdata->priv_lock, flags);
523         devdata->enabled = 0;
524         /* must wait for ack */
525         devdata->enab_dis_acked = 0;
526         spin_unlock_irqrestore(&devdata->priv_lock, flags);
527
528         /* send disable and wait for ack -- don't hold lock when sending
529          * disable because if the queue is full, insert might sleep.
530          * If an error occurs, don't wait for the timeout.
531          */
532         err = send_enbdis(netdev, 0, devdata);
533         if (err)
534                 return err;
535
536         /* wait for ack to arrive before we try to free rcv buffers
537          * NOTE: the other end automatically unposts the rcv buffers when
538          * when it gets a disable.
539          */
540         spin_lock_irqsave(&devdata->priv_lock, flags);
541         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
542                (wait < timeout)) {
543                 if (devdata->enab_dis_acked)
544                         break;
545                 if (devdata->server_down || devdata->server_change_state) {
546                         dev_dbg(&netdev->dev, "%s server went away\n",
547                                 __func__);
548                         break;
549                 }
550                 set_current_state(TASK_INTERRUPTIBLE);
551                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
552                 wait += schedule_timeout(msecs_to_jiffies(10));
553                 spin_lock_irqsave(&devdata->priv_lock, flags);
554         }
555
556         /* Wait for usage to go to 1 (no other users) before freeing
557          * rcv buffers
558          */
559         if (atomic_read(&devdata->usage) > 1) {
560                 while (1) {
561                         set_current_state(TASK_INTERRUPTIBLE);
562                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
563                         schedule_timeout(msecs_to_jiffies(10));
564                         spin_lock_irqsave(&devdata->priv_lock, flags);
565                         if (atomic_read(&devdata->usage))
566                                 break;
567                 }
568         }
569         /* we've set enabled to 0, so we can give up the lock. */
570         spin_unlock_irqrestore(&devdata->priv_lock, flags);
571
572         /* stop the transmit queue so nothing more can be transmitted */
573         netif_stop_queue(netdev);
574
575         napi_disable(&devdata->napi);
576
577         skb_queue_purge(&devdata->xmitbufhead);
578
579         /* Free rcv buffers - other end has automatically unposed them on
580          * disable
581          */
582         for (i = 0; i < devdata->num_rcv_bufs; i++) {
583                 if (devdata->rcvbuf[i]) {
584                         kfree_skb(devdata->rcvbuf[i]);
585                         devdata->rcvbuf[i] = NULL;
586                 }
587         }
588
589         return 0;
590 }
591
592 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
593  * @netdev:  struct netdevice.
594  * @devdata: visornic_devdata.
595  *
596  * Allocate rcv buffers and post them to the IO Partition.
597  *
598  * Return: 0 on success, negative integer on failure.
599  */
600 static int init_rcv_bufs(struct net_device *netdev,
601                          struct visornic_devdata *devdata)
602 {
603         int i, j, count, err;
604
605         /* allocate fixed number of receive buffers to post to uisnic
606          * post receive buffers after we've allocated a required amount
607          */
608         for (i = 0; i < devdata->num_rcv_bufs; i++) {
609                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
610                 /* if we failed to allocate one let us stop */
611                 if (!devdata->rcvbuf[i])
612                         break;
613         }
614         /* couldn't even allocate one -- bail out */
615         if (i == 0)
616                 return -ENOMEM;
617         count = i;
618
619         /* Ensure we can alloc 2/3rd of the requested number of buffers.
620          * 2/3 is an arbitrary choice; used also in ndis init.c
621          */
622         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
623                 /* free receive buffers we did alloc and then bail out */
624                 for (i = 0; i < count; i++) {
625                         kfree_skb(devdata->rcvbuf[i]);
626                         devdata->rcvbuf[i] = NULL;
627                 }
628                 return -ENOMEM;
629         }
630
631         /* post receive buffers to receive incoming input - without holding
632          * lock - we've not enabled nor started the queue so there shouldn't
633          * be any rcv or xmit activity
634          */
635         for (i = 0; i < count; i++) {
636                 err = post_skb(devdata->cmdrsp_rcv, devdata,
637                                devdata->rcvbuf[i]);
638                 if (!err)
639                         continue;
640
641                 /* Error handling -
642                  * If we posted at least one skb, we should return success,
643                  * but need to free the resources that we have not successfully
644                  * posted.
645                  */
646                 for (j = i; j < count; j++) {
647                         kfree_skb(devdata->rcvbuf[j]);
648                         devdata->rcvbuf[j] = NULL;
649                 }
650                 if (i == 0)
651                         return err;
652                 break;
653         }
654
655         return 0;
656 }
657
658 /* visornic_enable_with_timeout - send enable to IO Partition
659  * @netdev:  struct net_device.
660  * @timeout: Time to wait for the ACK from the enable.
661  *
662  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
663  * defined in msecs (timeout of 0 specifies infinite wait).
664  *
665  * Return: 0 on success, negative integer on failure.
666  */
667 static int visornic_enable_with_timeout(struct net_device *netdev,
668                                         const int timeout)
669 {
670         int err = 0;
671         struct visornic_devdata *devdata = netdev_priv(netdev);
672         unsigned long flags;
673         int wait = 0;
674
675         napi_enable(&devdata->napi);
676
677         /* NOTE: the other end automatically unposts the rcv buffers when it
678          * gets a disable.
679          */
680         err = init_rcv_bufs(netdev, devdata);
681         if (err < 0) {
682                 dev_err(&netdev->dev,
683                         "%s failed to init rcv bufs\n", __func__);
684                 return err;
685         }
686
687         spin_lock_irqsave(&devdata->priv_lock, flags);
688         devdata->enabled = 1;
689         devdata->enab_dis_acked = 0;
690
691         /* now we're ready, let's send an ENB to uisnic but until we get
692          * an ACK back from uisnic, we'll drop the packets
693          */
694         devdata->n_rcv_packets_not_accepted = 0;
695         spin_unlock_irqrestore(&devdata->priv_lock, flags);
696
697         /* send enable and wait for ack -- don't hold lock when sending enable
698          * because if the queue is full, insert might sleep. If an error
699          * occurs error out.
700          */
701         err = send_enbdis(netdev, 1, devdata);
702         if (err)
703                 return err;
704
705         spin_lock_irqsave(&devdata->priv_lock, flags);
706         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
707                (wait < timeout)) {
708                 if (devdata->enab_dis_acked)
709                         break;
710                 if (devdata->server_down || devdata->server_change_state) {
711                         dev_dbg(&netdev->dev, "%s server went away\n",
712                                 __func__);
713                         break;
714                 }
715                 set_current_state(TASK_INTERRUPTIBLE);
716                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
717                 wait += schedule_timeout(msecs_to_jiffies(10));
718                 spin_lock_irqsave(&devdata->priv_lock, flags);
719         }
720
721         spin_unlock_irqrestore(&devdata->priv_lock, flags);
722
723         if (!devdata->enab_dis_acked) {
724                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
725                 return -EIO;
726         }
727
728         netif_start_queue(netdev);
729         return 0;
730 }
731
732 /* visornic_timeout_reset - handle xmit timeout resets
733  * @work: Work item that scheduled the work.
734  *
735  * Transmit timeouts are typically handled by resetting the device for our
736  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
737  * respond, we will trigger a serverdown.
738  */
739 static void visornic_timeout_reset(struct work_struct *work)
740 {
741         struct visornic_devdata *devdata;
742         struct net_device *netdev;
743         int response = 0;
744
745         devdata = container_of(work, struct visornic_devdata, timeout_reset);
746         netdev = devdata->netdev;
747
748         rtnl_lock();
749         if (!netif_running(netdev)) {
750                 rtnl_unlock();
751                 return;
752         }
753
754         response = visornic_disable_with_timeout(netdev,
755                                                  VISORNIC_INFINITE_RSP_WAIT);
756         if (response)
757                 goto call_serverdown;
758
759         response = visornic_enable_with_timeout(netdev,
760                                                 VISORNIC_INFINITE_RSP_WAIT);
761         if (response)
762                 goto call_serverdown;
763
764         rtnl_unlock();
765
766         return;
767
768 call_serverdown:
769         visornic_serverdown(devdata, NULL);
770         rtnl_unlock();
771 }
772
773 /* visornic_open - enable the visornic device and mark the queue started
774  * @netdev: netdevice to start.
775  *
776  * Enable the device and start the transmit queue.
777  *
778  * Return: 0 on success.
779  */
780 static int visornic_open(struct net_device *netdev)
781 {
782         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
783         return 0;
784 }
785
786 /* visornic_close - disables the visornic device and stops the queues
787  * @netdev: netdevice to stop.
788  *
789  * Disable the device and stop the transmit queue.
790  *
791  * Return 0 on success.
792  */
793 static int visornic_close(struct net_device *netdev)
794 {
795         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
796         return 0;
797 }
798
799 /* devdata_xmits_outstanding - compute outstanding xmits
800  * @devdata: visornic_devdata for device
801  *
802  * Return: Long integer representing the number of outstanding xmits.
803  */
804 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
805 {
806         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
807                 return devdata->chstat.sent_xmit -
808                         devdata->chstat.got_xmit_done;
809         return (ULONG_MAX - devdata->chstat.got_xmit_done
810                 + devdata->chstat.sent_xmit + 1);
811 }
812
813 /* vnic_hit_high_watermark
814  * @devdata:        Indicates visornic device we are checking.
815  * @high_watermark: Max num of unacked xmits we will tolerate before we will
816  *                  start throttling.
817  *
818  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
819  *         high_watermark. False otherwise.
820  */
821 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
822                                     ulong high_watermark)
823 {
824         return (devdata_xmits_outstanding(devdata) >= high_watermark);
825 }
826
827 /* vnic_hit_low_watermark
828  * @devdata:       Indicates visornic device we are checking.
829  * @low_watermark: We will wait until the num of unacked xmits drops to this
830  *                 value or lower before we start transmitting again.
831  *
832  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
833  *         low_watermark.
834  */
835 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
836                                    ulong low_watermark)
837 {
838         return (devdata_xmits_outstanding(devdata) <= low_watermark);
839 }
840
841 /* visornic_xmit - send a packet to the IO Partition
842  * @skb:    Packet to be sent.
843  * @netdev: Net device the packet is being sent from.
844  *
845  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
846  * the XMIT command to the IO Partition for processing. This function is
847  * protected from concurrent calls by a spinlock xmit_lock in the net_device
848  * struct. As soon as the function returns, it can be called again.
849  *
850  * Return: NETDEV_TX_OK.
851  */
852 static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
853 {
854         struct visornic_devdata *devdata;
855         int len, firstfraglen, padlen;
856         struct uiscmdrsp *cmdrsp = NULL;
857         unsigned long flags;
858         int err;
859
860         devdata = netdev_priv(netdev);
861         spin_lock_irqsave(&devdata->priv_lock, flags);
862
863         if (netif_queue_stopped(netdev) || devdata->server_down ||
864             devdata->server_change_state) {
865                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
866                 devdata->busy_cnt++;
867                 dev_dbg(&netdev->dev,
868                         "%s busy - queue stopped\n", __func__);
869                 kfree_skb(skb);
870                 return NETDEV_TX_OK;
871         }
872
873         /* sk_buff struct is used to host network data throughout all the
874          * linux network subsystems
875          */
876         len = skb->len;
877
878         /* skb->len is the FULL length of data (including fragmentary portion)
879          * skb->data_len is the length of the fragment portion in frags
880          * skb->len - skb->data_len is size of the 1st fragment in skb->data
881          * calculate the length of the first fragment that skb->data is
882          * pointing to
883          */
884         firstfraglen = skb->len - skb->data_len;
885         if (firstfraglen < ETH_HLEN) {
886                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
887                 devdata->busy_cnt++;
888                 dev_err(&netdev->dev,
889                         "%s busy - first frag too small (%d)\n",
890                         __func__, firstfraglen);
891                 kfree_skb(skb);
892                 return NETDEV_TX_OK;
893         }
894
895         if (len < ETH_MIN_PACKET_SIZE &&
896             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
897                 /* pad the packet out to minimum size */
898                 padlen = ETH_MIN_PACKET_SIZE - len;
899                 memset(&skb->data[len], 0, padlen);
900                 skb->tail += padlen;
901                 skb->len += padlen;
902                 len += padlen;
903                 firstfraglen += padlen;
904         }
905
906         cmdrsp = devdata->xmit_cmdrsp;
907         /* clear cmdrsp */
908         memset(cmdrsp, 0, SIZEOF_CMDRSP);
909         cmdrsp->net.type = NET_XMIT;
910         cmdrsp->cmdtype = CMD_NET_TYPE;
911
912         /* save the pointer to skb -- we'll need it for completion */
913         cmdrsp->net.buf = skb;
914
915         if (vnic_hit_high_watermark(devdata,
916                                     devdata->max_outstanding_net_xmits)) {
917                 /* extra NET_XMITs queued over to IOVM - need to wait */
918                 devdata->chstat.reject_count++;
919                 if (!devdata->queuefullmsg_logged &&
920                     ((devdata->chstat.reject_count & 0x3ff) == 1))
921                         devdata->queuefullmsg_logged = 1;
922                 netif_stop_queue(netdev);
923                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
924                 devdata->busy_cnt++;
925                 dev_dbg(&netdev->dev,
926                         "%s busy - waiting for iovm to catch up\n",
927                         __func__);
928                 kfree_skb(skb);
929                 return NETDEV_TX_OK;
930         }
931         if (devdata->queuefullmsg_logged)
932                 devdata->queuefullmsg_logged = 0;
933
934         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
935                 cmdrsp->net.xmt.lincsum.valid = 1;
936                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
937                 if (skb_transport_header(skb) > skb->data) {
938                         cmdrsp->net.xmt.lincsum.hrawoff =
939                                 skb_transport_header(skb) - skb->data;
940                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
941                 }
942                 if (skb_network_header(skb) > skb->data) {
943                         cmdrsp->net.xmt.lincsum.nhrawoff =
944                                 skb_network_header(skb) - skb->data;
945                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
946                 }
947                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
948         } else {
949                 cmdrsp->net.xmt.lincsum.valid = 0;
950         }
951
952         /* save off the length of the entire data packet */
953         cmdrsp->net.xmt.len = len;
954
955         /* copy ethernet header from first frag into ocmdrsp
956          * - everything else will be pass in frags & DMA'ed
957          */
958         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
959
960         /* copy frags info - from skb->data we need to only provide access
961          * beyond eth header
962          */
963         cmdrsp->net.xmt.num_frags =
964                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
965                                               MAX_PHYS_INFO,
966                                               cmdrsp->net.xmt.frags);
967         if (cmdrsp->net.xmt.num_frags < 0) {
968                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
969                 devdata->busy_cnt++;
970                 dev_err(&netdev->dev,
971                         "%s busy - copy frags failed\n", __func__);
972                 kfree_skb(skb);
973                 return NETDEV_TX_OK;
974         }
975
976         err = visorchannel_signalinsert(devdata->dev->visorchannel,
977                                         IOCHAN_TO_IOPART, cmdrsp);
978         if (err) {
979                 netif_stop_queue(netdev);
980                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
981                 devdata->busy_cnt++;
982                 dev_dbg(&netdev->dev,
983                         "%s busy - signalinsert failed\n", __func__);
984                 kfree_skb(skb);
985                 return NETDEV_TX_OK;
986         }
987
988         /* Track the skbs that have been sent to the IOVM for XMIT */
989         skb_queue_head(&devdata->xmitbufhead, skb);
990
991         /* update xmt stats */
992         devdata->net_stats.tx_packets++;
993         devdata->net_stats.tx_bytes += skb->len;
994         devdata->chstat.sent_xmit++;
995
996         /* check if we have hit the high watermark for netif_stop_queue() */
997         if (vnic_hit_high_watermark(devdata,
998                                     devdata->upper_threshold_net_xmits)) {
999                 /* extra NET_XMITs queued over to IOVM - need to wait */
1000                 /* stop queue - call netif_wake_queue() after lower threshold */
1001                 netif_stop_queue(netdev);
1002                 dev_dbg(&netdev->dev,
1003                         "%s busy - invoking iovm flow control\n",
1004                         __func__);
1005                 devdata->flow_control_upper_hits++;
1006         }
1007         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1008
1009         /* skb will be freed when we get back NET_XMIT_DONE */
1010         return NETDEV_TX_OK;
1011 }
1012
1013 /* visornic_get_stats - returns net_stats of the visornic device
1014  * @netdev: netdevice.
1015  *
1016  * Return: Pointer to the net_device_stats struct for the device.
1017  */
1018 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1019 {
1020         struct visornic_devdata *devdata = netdev_priv(netdev);
1021
1022         return &devdata->net_stats;
1023 }
1024
1025 /* visornic_change_mtu - changes mtu of device
1026  * @netdev: netdevice.
1027  * @new_mtu: Value of new mtu.
1028  *
1029  * The device's MTU cannot be changed by system; it must be changed via a
1030  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1031  * for everything to work. Currently not supported.
1032  *
1033  * Return: -EINVAL.
1034  */
1035 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1036 {
1037         return -EINVAL;
1038 }
1039
1040 /* visornic_set_multi - set visornic device flags
1041  * @netdev: netdevice.
1042  *
1043  * The only flag we currently support is IFF_PROMISC.
1044  */
1045 static void visornic_set_multi(struct net_device *netdev)
1046 {
1047         struct uiscmdrsp *cmdrsp;
1048         struct visornic_devdata *devdata = netdev_priv(netdev);
1049         int err = 0;
1050
1051         if (devdata->old_flags == netdev->flags)
1052                 return;
1053
1054         if ((netdev->flags & IFF_PROMISC) ==
1055             (devdata->old_flags & IFF_PROMISC))
1056                 goto out_save_flags;
1057
1058         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1059         if (!cmdrsp)
1060                 return;
1061         cmdrsp->cmdtype = CMD_NET_TYPE;
1062         cmdrsp->net.type = NET_RCV_PROMISC;
1063         cmdrsp->net.enbdis.context = netdev;
1064         cmdrsp->net.enbdis.enable =
1065                 netdev->flags & IFF_PROMISC;
1066         err = visorchannel_signalinsert(devdata->dev->visorchannel,
1067                                         IOCHAN_TO_IOPART,
1068                                         cmdrsp);
1069         kfree(cmdrsp);
1070         if (err)
1071                 return;
1072
1073 out_save_flags:
1074         devdata->old_flags = netdev->flags;
1075 }
1076
1077 /* visornic_xmit_timeout - request to timeout the xmit
1078  * @netdev: netdevice.
1079  *
1080  * Queue the work and return. Make sure we have not already been informed that
1081  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1082  */
1083 static void visornic_xmit_timeout(struct net_device *netdev)
1084 {
1085         struct visornic_devdata *devdata = netdev_priv(netdev);
1086         unsigned long flags;
1087
1088         spin_lock_irqsave(&devdata->priv_lock, flags);
1089         if (devdata->going_away) {
1090                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1091                 dev_dbg(&devdata->dev->device,
1092                         "%s aborting because device removal pending\n",
1093                         __func__);
1094                 return;
1095         }
1096
1097         /* Ensure that a ServerDown message hasn't been received */
1098         if (!devdata->enabled ||
1099             (devdata->server_down && !devdata->server_change_state)) {
1100                 dev_dbg(&netdev->dev, "%s no processing\n",
1101                         __func__);
1102                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1103                 return;
1104         }
1105         schedule_work(&devdata->timeout_reset);
1106         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1107 }
1108
1109 /* repost_return - repost rcv bufs that have come back
1110  * @cmdrsp: IO channel command struct to post.
1111  * @devdata: Visornic devdata for the device.
1112  * @skb: Socket buffer.
1113  * @netdev: netdevice.
1114  *
1115  * Repost rcv buffers that have been returned to us when we are finished
1116  * with them.
1117  *
1118  * Return: 0 for success, negative integer on error.
1119  */
1120 static int repost_return(struct uiscmdrsp *cmdrsp,
1121                          struct visornic_devdata *devdata,
1122                          struct sk_buff *skb, struct net_device *netdev)
1123 {
1124         struct net_pkt_rcv copy;
1125         int i = 0, cc, numreposted;
1126         int found_skb = 0;
1127         int status = 0;
1128
1129         copy = cmdrsp->net.rcv;
1130         switch (copy.numrcvbufs) {
1131         case 0:
1132                 devdata->n_rcv0++;
1133                 break;
1134         case 1:
1135                 devdata->n_rcv1++;
1136                 break;
1137         case 2:
1138                 devdata->n_rcv2++;
1139                 break;
1140         default:
1141                 devdata->n_rcvx++;
1142                 break;
1143         }
1144         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1145                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1146                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1147                                 continue;
1148
1149                         if ((skb) && devdata->rcvbuf[i] == skb) {
1150                                 devdata->found_repost_rcvbuf_cnt++;
1151                                 found_skb = 1;
1152                                 devdata->repost_found_skb_cnt++;
1153                         }
1154                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1155                         if (!devdata->rcvbuf[i]) {
1156                                 devdata->num_rcv_bufs_could_not_alloc++;
1157                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1158                                 status = -ENOMEM;
1159                                 break;
1160                         }
1161                         status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1162                         if (status) {
1163                                 kfree_skb(devdata->rcvbuf[i]);
1164                                 devdata->rcvbuf[i] = NULL;
1165                                 break;
1166                         }
1167                         numreposted++;
1168                         break;
1169                 }
1170         }
1171         if (numreposted != copy.numrcvbufs) {
1172                 devdata->n_repost_deficit++;
1173                 status = -EINVAL;
1174         }
1175         if (skb) {
1176                 if (found_skb) {
1177                         kfree_skb(skb);
1178                 } else {
1179                         status = -EINVAL;
1180                         devdata->bad_rcv_buf++;
1181                 }
1182         }
1183         return status;
1184 }
1185
1186 /* visornic_rx - handle receive packets coming back from IO Partition
1187  * @cmdrsp: Receive packet returned from IO Partition.
1188  *
1189  * Got a receive packet back from the IO Partition; handle it and send it up
1190  * the stack.
1191
1192  * Return: 1 iff an skb was received, otherwise 0.
1193  */
1194 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1195 {
1196         struct visornic_devdata *devdata;
1197         struct sk_buff *skb, *prev, *curr;
1198         struct net_device *netdev;
1199         int cc, currsize, off;
1200         struct ethhdr *eth;
1201         unsigned long flags;
1202
1203         /* post new rcv buf to the other end using the cmdrsp we have at hand
1204          * post it without holding lock - but we'll use the signal lock to
1205          * synchronize the queue insert the cmdrsp that contains the net.rcv
1206          * is the one we are using to repost, so copy the info we need from it.
1207          */
1208         skb = cmdrsp->net.buf;
1209         netdev = skb->dev;
1210
1211         devdata = netdev_priv(netdev);
1212
1213         spin_lock_irqsave(&devdata->priv_lock, flags);
1214         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1215
1216         /* set length to how much was ACTUALLY received -
1217          * NOTE: rcv_done_len includes actual length of data rcvd
1218          * including ethhdr
1219          */
1220         skb->len = cmdrsp->net.rcv.rcv_done_len;
1221
1222         /* update rcv stats - call it with priv_lock held */
1223         devdata->net_stats.rx_packets++;
1224         devdata->net_stats.rx_bytes += skb->len;
1225
1226         /* test enabled while holding lock */
1227         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1228                 /* don't process it unless we're in enable mode and until
1229                  * we've gotten an ACK saying the other end got our RCV enable
1230                  */
1231                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1232                 repost_return(cmdrsp, devdata, skb, netdev);
1233                 return 0;
1234         }
1235
1236         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1237
1238         /* when skb was allocated, skb->dev, skb->data, skb->len and
1239          * skb->data_len were setup. AND, data has already put into the
1240          * skb (both first frag and in frags pages)
1241          * NOTE: firstfragslen is the amount of data in skb->data and that
1242          * which is not in nr_frags or frag_list. This is now simply
1243          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1244          * firstfrag & set data_len to show rest see if we have to chain
1245          * frag_list.
1246          */
1247         /* do PRECAUTIONARY check */
1248         if (skb->len > RCVPOST_BUF_SIZE) {
1249                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1250                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1251                                 dev_err(&devdata->netdev->dev,
1252                                         "repost_return failed");
1253                         return 0;
1254                 }
1255                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1256                 /* amount in skb->data */
1257                 skb->tail += RCVPOST_BUF_SIZE;
1258                 /* amount that will be in frag_list */
1259                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1260         } else {
1261                 /* data fits in this skb - no chaining - do
1262                  * PRECAUTIONARY check
1263                  */
1264                 /* should be 1 */
1265                 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1266                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1267                                 dev_err(&devdata->netdev->dev,
1268                                         "repost_return failed");
1269                         return 0;
1270                 }
1271                 skb->tail += skb->len;
1272                 /* nothing rcvd in frag_list */
1273                 skb->data_len = 0;
1274         }
1275         off = skb_tail_pointer(skb) - skb->data;
1276
1277         /* amount we bumped tail by in the head skb
1278          * it is used to calculate the size of each chained skb below
1279          * it is also used to index into bufline to continue the copy
1280          * (for chansocktwopc)
1281          * if necessary chain the rcv skbs together.
1282          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1283          * chain the rest to that one.
1284          * - do PRECAUTIONARY check
1285          */
1286         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1287                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1288                         dev_err(&devdata->netdev->dev, "repost_return failed");
1289                 return 0;
1290         }
1291
1292         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1293                 /* chain the various rcv buffers into the skb's frag_list. */
1294                 /* Note: off was initialized above  */
1295                 for (cc = 1, prev = NULL;
1296                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1297                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1298                         curr->next = NULL;
1299                         /* start of list- set head */
1300                         if (!prev)
1301                                 skb_shinfo(skb)->frag_list = curr;
1302                         else
1303                                 prev->next = curr;
1304                         prev = curr;
1305
1306                         /* should we set skb->len and skb->data_len for each
1307                          * buffer being chained??? can't hurt!
1308                          */
1309                         currsize = min(skb->len - off,
1310                                        (unsigned int)RCVPOST_BUF_SIZE);
1311                         curr->len = currsize;
1312                         curr->tail += currsize;
1313                         curr->data_len = 0;
1314                         off += currsize;
1315                 }
1316                 /* assert skb->len == off */
1317                 if (skb->len != off) {
1318                         netdev_err(devdata->netdev,
1319                                    "something wrong; skb->len:%d != off:%d\n",
1320                                    skb->len, off);
1321                 }
1322         }
1323
1324         /* set up packet's protocol type using ethernet header - this
1325          * sets up skb->pkt_type & it also PULLS out the eth header
1326          */
1327         skb->protocol = eth_type_trans(skb, netdev);
1328         eth = eth_hdr(skb);
1329         skb->csum = 0;
1330         skb->ip_summed = CHECKSUM_NONE;
1331
1332         do {
1333                 /* accept all packets */
1334                 if (netdev->flags & IFF_PROMISC)
1335                         break;
1336                 if (skb->pkt_type == PACKET_BROADCAST) {
1337                         /* accept all broadcast packets */
1338                         if (netdev->flags & IFF_BROADCAST)
1339                                 break;
1340                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1341                         if ((netdev->flags & IFF_MULTICAST) &&
1342                             (netdev_mc_count(netdev))) {
1343                                 struct netdev_hw_addr *ha;
1344                                 int found_mc = 0;
1345
1346                                 /* only accept multicast packets that we can
1347                                  * find in our multicast address list
1348                                  */
1349                                 netdev_for_each_mc_addr(ha, netdev) {
1350                                         if (ether_addr_equal(eth->h_dest,
1351                                                              ha->addr)) {
1352                                                 found_mc = 1;
1353                                                 break;
1354                                         }
1355                                 }
1356                                 /* accept pkt, dest matches a multicast addr */
1357                                 if (found_mc)
1358                                         break;
1359                         }
1360                 /* accept packet, h_dest must match vnic  mac address */
1361                 } else if (skb->pkt_type == PACKET_HOST) {
1362                         break;
1363                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1364                         /* something is not right */
1365                         dev_err(&devdata->netdev->dev,
1366                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1367                                 netdev->name, eth->h_dest, netdev->dev_addr);
1368                 }
1369                 /* drop packet - don't forward it up to OS */
1370                 devdata->n_rcv_packets_not_accepted++;
1371                 repost_return(cmdrsp, devdata, skb, netdev);
1372                 return 0;
1373         } while (0);
1374
1375         netif_receive_skb(skb);
1376         /* netif_rx returns various values, but "in practice most drivers
1377          * ignore the return value
1378          */
1379
1380         skb = NULL;
1381         /* whether the packet got dropped or handled, the skb is freed by
1382          * kernel code, so we shouldn't free it. but we should repost a
1383          * new rcv buffer.
1384          */
1385         repost_return(cmdrsp, devdata, skb, netdev);
1386         return 1;
1387 }
1388
1389 /* devdata_initialize - initialize devdata structure
1390  * @devdata: visornic_devdata structure to initialize.
1391  * @dev:     visorbus_device it belongs to.
1392  *
1393  * Setup initial values for the visornic, based on channel and default values.
1394  *
1395  * Return: A pointer to the devdata structure.
1396  */
1397 static struct visornic_devdata *devdata_initialize(
1398                                         struct visornic_devdata *devdata,
1399                                         struct visor_device *dev)
1400 {
1401         devdata->dev = dev;
1402         devdata->incarnation_id = get_jiffies_64();
1403         return devdata;
1404 }
1405
1406 /* devdata_release - free up references in devdata
1407  * @devdata: Struct to clean up.
1408  */
1409 static void devdata_release(struct visornic_devdata *devdata)
1410 {
1411         kfree(devdata->rcvbuf);
1412         kfree(devdata->cmdrsp_rcv);
1413         kfree(devdata->xmit_cmdrsp);
1414 }
1415
1416 static const struct net_device_ops visornic_dev_ops = {
1417         .ndo_open = visornic_open,
1418         .ndo_stop = visornic_close,
1419         .ndo_start_xmit = visornic_xmit,
1420         .ndo_get_stats = visornic_get_stats,
1421         .ndo_change_mtu = visornic_change_mtu,
1422         .ndo_tx_timeout = visornic_xmit_timeout,
1423         .ndo_set_rx_mode = visornic_set_multi,
1424 };
1425
1426 /* DebugFS code */
1427 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1428                                  size_t len, loff_t *offset)
1429 {
1430         ssize_t bytes_read = 0;
1431         int str_pos = 0;
1432         struct visornic_devdata *devdata;
1433         struct net_device *dev;
1434         char *vbuf;
1435
1436         if (len > MAX_BUF)
1437                 len = MAX_BUF;
1438         vbuf = kzalloc(len, GFP_KERNEL);
1439         if (!vbuf)
1440                 return -ENOMEM;
1441
1442         /* for each vnic channel dump out channel specific data */
1443         rcu_read_lock();
1444         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1445                 /* Only consider netdevs that are visornic, and are open */
1446                 if (dev->netdev_ops != &visornic_dev_ops ||
1447                     (!netif_queue_stopped(dev)))
1448                         continue;
1449
1450                 devdata = netdev_priv(dev);
1451                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1452                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1453                                      dev->name,
1454                                      dev,
1455                                      dev->dev_addr);
1456                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457                                      "VisorNic Dev Info = 0x%p\n", devdata);
1458                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459                                      " num_rcv_bufs = %d\n",
1460                                      devdata->num_rcv_bufs);
1461                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462                                      " max_outstanding_next_xmits = %lu\n",
1463                                     devdata->max_outstanding_net_xmits);
1464                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465                                      " upper_threshold_net_xmits = %lu\n",
1466                                      devdata->upper_threshold_net_xmits);
1467                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468                                      " lower_threshold_net_xmits = %lu\n",
1469                                      devdata->lower_threshold_net_xmits);
1470                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471                                      " queuefullmsg_logged = %d\n",
1472                                      devdata->queuefullmsg_logged);
1473                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474                                      " chstat.got_rcv = %lu\n",
1475                                      devdata->chstat.got_rcv);
1476                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477                                      " chstat.got_enbdisack = %lu\n",
1478                                      devdata->chstat.got_enbdisack);
1479                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480                                      " chstat.got_xmit_done = %lu\n",
1481                                      devdata->chstat.got_xmit_done);
1482                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                      " chstat.xmit_fail = %lu\n",
1484                                      devdata->chstat.xmit_fail);
1485                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486                                      " chstat.sent_enbdis = %lu\n",
1487                                      devdata->chstat.sent_enbdis);
1488                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                      " chstat.sent_promisc = %lu\n",
1490                                      devdata->chstat.sent_promisc);
1491                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                      " chstat.sent_post = %lu\n",
1493                                      devdata->chstat.sent_post);
1494                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                      " chstat.sent_post_failed = %lu\n",
1496                                      devdata->chstat.sent_post_failed);
1497                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                                      " chstat.sent_xmit = %lu\n",
1499                                      devdata->chstat.sent_xmit);
1500                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501                                      " chstat.reject_count = %lu\n",
1502                                      devdata->chstat.reject_count);
1503                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1504                                      " chstat.extra_rcvbufs_sent = %lu\n",
1505                                      devdata->chstat.extra_rcvbufs_sent);
1506                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1508                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1510                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1512                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1514                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515                                      " num_rcvbuf_in_iovm = %d\n",
1516                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1517                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1519                                      devdata->alloc_failed_in_if_needed_cnt);
1520                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1521                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1522                                      devdata->alloc_failed_in_repost_rtn_cnt);
1523                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1525                  *                   devdata->inner_loop_limit_reached_cnt);
1526                  */
1527                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528                                      " found_repost_rcvbuf_cnt = %lu\n",
1529                                      devdata->found_repost_rcvbuf_cnt);
1530                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531                                      " repost_found_skb_cnt = %lu\n",
1532                                      devdata->repost_found_skb_cnt);
1533                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534                                      " n_repost_deficit = %lu\n",
1535                                      devdata->n_repost_deficit);
1536                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537                                      " bad_rcv_buf = %lu\n",
1538                                      devdata->bad_rcv_buf);
1539                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540                                      " n_rcv_packets_not_accepted = %lu\n",
1541                                      devdata->n_rcv_packets_not_accepted);
1542                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543                                      " interrupts_rcvd = %llu\n",
1544                                      devdata->interrupts_rcvd);
1545                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546                                      " interrupts_notme = %llu\n",
1547                                      devdata->interrupts_notme);
1548                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549                                      " interrupts_disabled = %llu\n",
1550                                      devdata->interrupts_disabled);
1551                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552                                      " busy_cnt = %llu\n",
1553                                      devdata->busy_cnt);
1554                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555                                      " flow_control_upper_hits = %llu\n",
1556                                      devdata->flow_control_upper_hits);
1557                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1558                                      " flow_control_lower_hits = %llu\n",
1559                                      devdata->flow_control_lower_hits);
1560                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561                                      " netif_queue = %s\n",
1562                                      netif_queue_stopped(devdata->netdev) ?
1563                                      "stopped" : "running");
1564                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1565                                      " xmits_outstanding = %lu\n",
1566                                      devdata_xmits_outstanding(devdata));
1567         }
1568         rcu_read_unlock();
1569         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1570         kfree(vbuf);
1571         return bytes_read;
1572 }
1573
1574 static struct dentry *visornic_debugfs_dir;
1575 static const struct file_operations debugfs_info_fops = {
1576         .read = info_debugfs_read,
1577 };
1578
1579 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1580  * @devdata: Visornic device.
1581  */
1582 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1583 {
1584         int i;
1585         struct net_device *netdev;
1586         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1587         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1588         int err;
1589
1590         /* don't do this until vnic is marked ready */
1591         if (!(devdata->enabled && devdata->enab_dis_acked))
1592                 return;
1593
1594         netdev = devdata->netdev;
1595         rcv_bufs_allocated = 0;
1596         /* this code is trying to prevent getting stuck here forever,
1597          * but still retry it if you cant allocate them all this time.
1598          */
1599         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1600         while (cur_num_rcv_bufs_to_alloc > 0) {
1601                 cur_num_rcv_bufs_to_alloc--;
1602                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1603                         if (devdata->rcvbuf[i])
1604                                 continue;
1605                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1606                         if (!devdata->rcvbuf[i]) {
1607                                 devdata->alloc_failed_in_if_needed_cnt++;
1608                                 break;
1609                         }
1610                         rcv_bufs_allocated++;
1611                         err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1612                         if (err) {
1613                                 kfree_skb(devdata->rcvbuf[i]);
1614                                 devdata->rcvbuf[i] = NULL;
1615                                 break;
1616                         }
1617                         devdata->chstat.extra_rcvbufs_sent++;
1618                 }
1619         }
1620         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1621 }
1622
1623 /* drain_resp_queue - drains and ignores all messages from the resp queue
1624  * @cmdrsp:  IO channel command response message.
1625  * @devdata: Visornic device to drain.
1626  */
1627 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1628                              struct visornic_devdata *devdata)
1629 {
1630         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1631                                           IOCHAN_FROM_IOPART,
1632                                           cmdrsp))
1633                 ;
1634 }
1635
1636 /* service_resp_queue - drain the response queue
1637  * @cmdrsp:  IO channel command response message.
1638  * @devdata: Visornic device to drain.
1639  * @rx_work_done:
1640  * @budget:
1641  *
1642  * Drain the response queue of any responses from the IO Partition. Process the
1643  * responses as we get them.
1644  */
1645 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1646                                struct visornic_devdata *devdata,
1647                                int *rx_work_done, int budget)
1648 {
1649         unsigned long flags;
1650         struct net_device *netdev;
1651
1652         while (*rx_work_done < budget) {
1653                 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1654                  * moment
1655                  */
1656                 /* queue empty */
1657                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1658                                               IOCHAN_FROM_IOPART,
1659                                               cmdrsp))
1660                         break;
1661
1662                 switch (cmdrsp->net.type) {
1663                 case NET_RCV:
1664                         devdata->chstat.got_rcv++;
1665                         /* process incoming packet */
1666                         *rx_work_done += visornic_rx(cmdrsp);
1667                         break;
1668                 case NET_XMIT_DONE:
1669                         spin_lock_irqsave(&devdata->priv_lock, flags);
1670                         devdata->chstat.got_xmit_done++;
1671                         if (cmdrsp->net.xmtdone.xmt_done_result)
1672                                 devdata->chstat.xmit_fail++;
1673                         /* only call queue wake if we stopped it */
1674                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1675                         /* ASSERT netdev == vnicinfo->netdev; */
1676                         if (netdev == devdata->netdev &&
1677                             netif_queue_stopped(netdev)) {
1678                                 /* check if we have crossed the lower watermark
1679                                  * for netif_wake_queue()
1680                                  */
1681                                 if (vnic_hit_low_watermark
1682                                     (devdata,
1683                                      devdata->lower_threshold_net_xmits)) {
1684                                         /* enough NET_XMITs completed
1685                                          * so can restart netif queue
1686                                          */
1687                                         netif_wake_queue(netdev);
1688                                         devdata->flow_control_lower_hits++;
1689                                 }
1690                         }
1691                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1692                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1693                         kfree_skb(cmdrsp->net.buf);
1694                         break;
1695                 case NET_RCV_ENBDIS_ACK:
1696                         devdata->chstat.got_enbdisack++;
1697                         netdev = (struct net_device *)
1698                         cmdrsp->net.enbdis.context;
1699                         spin_lock_irqsave(&devdata->priv_lock, flags);
1700                         devdata->enab_dis_acked = 1;
1701                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1702
1703                         if (devdata->server_down &&
1704                             devdata->server_change_state) {
1705                                 /* Inform Linux that the link is up */
1706                                 devdata->server_down = false;
1707                                 devdata->server_change_state = false;
1708                                 netif_wake_queue(netdev);
1709                                 netif_carrier_on(netdev);
1710                         }
1711                         break;
1712                 case NET_CONNECT_STATUS:
1713                         netdev = devdata->netdev;
1714                         if (cmdrsp->net.enbdis.enable == 1) {
1715                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1716                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1717                                 spin_unlock_irqrestore(&devdata->priv_lock,
1718                                                        flags);
1719                                 netif_wake_queue(netdev);
1720                                 netif_carrier_on(netdev);
1721                         } else {
1722                                 netif_stop_queue(netdev);
1723                                 netif_carrier_off(netdev);
1724                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1725                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1726                                 spin_unlock_irqrestore(&devdata->priv_lock,
1727                                                        flags);
1728                         }
1729                         break;
1730                 default:
1731                         break;
1732                 }
1733                 /* cmdrsp is now available for reuse  */
1734         }
1735 }
1736
1737 static int visornic_poll(struct napi_struct *napi, int budget)
1738 {
1739         struct visornic_devdata *devdata = container_of(napi,
1740                                                         struct visornic_devdata,
1741                                                         napi);
1742         int rx_count = 0;
1743
1744         send_rcv_posts_if_needed(devdata);
1745         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1746
1747         /* If there aren't any more packets to receive stop the poll */
1748         if (rx_count < budget)
1749                 napi_complete_done(napi, rx_count);
1750
1751         return rx_count;
1752 }
1753
1754 /* poll_for_irq - checks the status of the response queue
1755  * @v: Void pointer to the visronic devdata struct.
1756  *
1757  * Main function of the vnic_incoming thread. Periodically check the response
1758  * queue and drain it if needed.
1759  */
1760 static void poll_for_irq(struct timer_list *t)
1761 {
1762         struct visornic_devdata *devdata = from_timer(devdata, t,
1763                                                       irq_poll_timer);
1764
1765         if (!visorchannel_signalempty(
1766                                    devdata->dev->visorchannel,
1767                                    IOCHAN_FROM_IOPART))
1768                 napi_schedule(&devdata->napi);
1769
1770         atomic_set(&devdata->interrupt_rcvd, 0);
1771
1772         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1773 }
1774
1775 /* visornic_probe - probe function for visornic devices
1776  * @dev: The visor device discovered.
1777  *
1778  * Called when visorbus discovers a visornic device on its bus. It creates a new
1779  * visornic ethernet adapter.
1780  *
1781  * Return: 0 on success, or negative integer on error.
1782  */
1783 static int visornic_probe(struct visor_device *dev)
1784 {
1785         struct visornic_devdata *devdata = NULL;
1786         struct net_device *netdev = NULL;
1787         int err;
1788         int channel_offset = 0;
1789         u64 features;
1790
1791         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1792         if (!netdev) {
1793                 dev_err(&dev->device,
1794                         "%s alloc_etherdev failed\n", __func__);
1795                 return -ENOMEM;
1796         }
1797
1798         netdev->netdev_ops = &visornic_dev_ops;
1799         netdev->watchdog_timeo = 5 * HZ;
1800         SET_NETDEV_DEV(netdev, &dev->device);
1801
1802         /* Get MAC address from channel and read it into the device. */
1803         netdev->addr_len = ETH_ALEN;
1804         channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1805         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1806                                     ETH_ALEN);
1807         if (err < 0) {
1808                 dev_err(&dev->device,
1809                         "%s failed to get mac addr from chan (%d)\n",
1810                         __func__, err);
1811                 goto cleanup_netdev;
1812         }
1813
1814         devdata = devdata_initialize(netdev_priv(netdev), dev);
1815         if (!devdata) {
1816                 dev_err(&dev->device,
1817                         "%s devdata_initialize failed\n", __func__);
1818                 err = -ENOMEM;
1819                 goto cleanup_netdev;
1820         }
1821         /* don't trust messages laying around in the channel */
1822         drain_resp_queue(devdata->cmdrsp, devdata);
1823
1824         devdata->netdev = netdev;
1825         dev_set_drvdata(&dev->device, devdata);
1826         init_waitqueue_head(&devdata->rsp_queue);
1827         spin_lock_init(&devdata->priv_lock);
1828         /* not yet */
1829         devdata->enabled = 0;
1830         atomic_set(&devdata->usage, 1);
1831
1832         /* Setup rcv bufs */
1833         channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1834         err = visorbus_read_channel(dev, channel_offset,
1835                                     &devdata->num_rcv_bufs, 4);
1836         if (err) {
1837                 dev_err(&dev->device,
1838                         "%s failed to get #rcv bufs from chan (%d)\n",
1839                         __func__, err);
1840                 goto cleanup_netdev;
1841         }
1842
1843         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1844                                   sizeof(struct sk_buff *), GFP_KERNEL);
1845         if (!devdata->rcvbuf) {
1846                 err = -ENOMEM;
1847                 goto cleanup_netdev;
1848         }
1849
1850         /* set the net_xmit outstanding threshold
1851          * always leave two slots open but you should have 3 at a minimum
1852          * note that max_outstanding_net_xmits must be > 0
1853          */
1854         devdata->max_outstanding_net_xmits =
1855                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1856         devdata->upper_threshold_net_xmits =
1857                 max_t(unsigned long,
1858                       2, (devdata->max_outstanding_net_xmits - 1));
1859         devdata->lower_threshold_net_xmits =
1860                 max_t(unsigned long,
1861                       1, (devdata->max_outstanding_net_xmits / 2));
1862
1863         skb_queue_head_init(&devdata->xmitbufhead);
1864
1865         /* create a cmdrsp we can use to post and unpost rcv buffers */
1866         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1867         if (!devdata->cmdrsp_rcv) {
1868                 err = -ENOMEM;
1869                 goto cleanup_rcvbuf;
1870         }
1871         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1872         if (!devdata->xmit_cmdrsp) {
1873                 err = -ENOMEM;
1874                 goto cleanup_cmdrsp_rcv;
1875         }
1876         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1877         devdata->server_down = false;
1878         devdata->server_change_state = false;
1879
1880         /*set the default mtu */
1881         channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1882         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1883         if (err) {
1884                 dev_err(&dev->device,
1885                         "%s failed to get mtu from chan (%d)\n",
1886                         __func__, err);
1887                 goto cleanup_xmit_cmdrsp;
1888         }
1889
1890         /* TODO: Setup Interrupt information */
1891         /* Let's start our threads to get responses */
1892         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1893
1894         timer_setup(&devdata->irq_poll_timer, poll_for_irq, 0);
1895         /* Note: This time has to start running before the while
1896          * loop below because the napi routine is responsible for
1897          * setting enab_dis_acked
1898          */
1899         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1900
1901         channel_offset = offsetof(struct visor_io_channel,
1902                                   channel_header.features);
1903         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1904         if (err) {
1905                 dev_err(&dev->device,
1906                         "%s failed to get features from chan (%d)\n",
1907                         __func__, err);
1908                 goto cleanup_napi_add;
1909         }
1910
1911         features |= VISOR_CHANNEL_IS_POLLING;
1912         features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1913         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1914         if (err) {
1915                 dev_err(&dev->device,
1916                         "%s failed to set features in chan (%d)\n",
1917                         __func__, err);
1918                 goto cleanup_napi_add;
1919         }
1920
1921         /* Note: Interrupts have to be enable before the while
1922          * loop below because the napi routine is responsible for
1923          * setting enab_dis_acked
1924          */
1925         visorbus_enable_channel_interrupts(dev);
1926
1927         err = register_netdev(netdev);
1928         if (err) {
1929                 dev_err(&dev->device,
1930                         "%s register_netdev failed (%d)\n", __func__, err);
1931                 goto cleanup_napi_add;
1932         }
1933
1934         /* create debug/sysfs directories */
1935         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1936                                                       visornic_debugfs_dir);
1937         if (!devdata->eth_debugfs_dir) {
1938                 dev_err(&dev->device,
1939                         "%s debugfs_create_dir %s failed\n",
1940                         __func__, netdev->name);
1941                 err = -ENOMEM;
1942                 goto cleanup_register_netdev;
1943         }
1944
1945         dev_info(&dev->device, "%s success netdev=%s\n",
1946                  __func__, netdev->name);
1947         return 0;
1948
1949 cleanup_register_netdev:
1950         unregister_netdev(netdev);
1951
1952 cleanup_napi_add:
1953         del_timer_sync(&devdata->irq_poll_timer);
1954         netif_napi_del(&devdata->napi);
1955
1956 cleanup_xmit_cmdrsp:
1957         kfree(devdata->xmit_cmdrsp);
1958
1959 cleanup_cmdrsp_rcv:
1960         kfree(devdata->cmdrsp_rcv);
1961
1962 cleanup_rcvbuf:
1963         kfree(devdata->rcvbuf);
1964
1965 cleanup_netdev:
1966         free_netdev(netdev);
1967         return err;
1968 }
1969
1970 /* host_side_disappeared - IO Partition is gone
1971  * @devdata: Device object.
1972  *
1973  * IO partition servicing this device is gone; do cleanup.
1974  */
1975 static void host_side_disappeared(struct visornic_devdata *devdata)
1976 {
1977         unsigned long flags;
1978
1979         spin_lock_irqsave(&devdata->priv_lock, flags);
1980         /* indicate device destroyed */
1981         devdata->dev = NULL;
1982         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1983 }
1984
1985 /* visornic_remove - called when visornic dev goes away
1986  * @dev: Visornic device that is being removed.
1987  *
1988  * Called when DEVICE_DESTROY gets called to remove device.
1989  */
1990 static void visornic_remove(struct visor_device *dev)
1991 {
1992         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1993         struct net_device *netdev;
1994         unsigned long flags;
1995
1996         if (!devdata) {
1997                 dev_err(&dev->device, "%s no devdata\n", __func__);
1998                 return;
1999         }
2000         spin_lock_irqsave(&devdata->priv_lock, flags);
2001         if (devdata->going_away) {
2002                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2003                 dev_err(&dev->device, "%s already being removed\n", __func__);
2004                 return;
2005         }
2006         devdata->going_away = true;
2007         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2008         netdev = devdata->netdev;
2009         if (!netdev) {
2010                 dev_err(&dev->device, "%s not net device\n", __func__);
2011                 return;
2012         }
2013
2014         /* going_away prevents new items being added to the workqueues */
2015         cancel_work_sync(&devdata->timeout_reset);
2016
2017         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2018         /* this will call visornic_close() */
2019         unregister_netdev(netdev);
2020
2021         del_timer_sync(&devdata->irq_poll_timer);
2022         netif_napi_del(&devdata->napi);
2023
2024         dev_set_drvdata(&dev->device, NULL);
2025         host_side_disappeared(devdata);
2026         devdata_release(devdata);
2027         free_netdev(netdev);
2028 }
2029
2030 /* visornic_pause - called when IO Part disappears
2031  * @dev:           Visornic device that is being serviced.
2032  * @complete_func: Call when finished.
2033  *
2034  * Called when the IO Partition has gone down. Need to free up resources and
2035  * wait for IO partition to come back. Mark link as down and don't attempt any
2036  * DMA. When we have freed memory, call the complete_func so that Command knows
2037  * we are done. If we don't call complete_func, the IO Partition will never
2038  * come back.
2039  *
2040  * Return: 0 on success.
2041  */
2042 static int visornic_pause(struct visor_device *dev,
2043                           visorbus_state_complete_func complete_func)
2044 {
2045         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2046
2047         visornic_serverdown(devdata, complete_func);
2048         return 0;
2049 }
2050
2051 /* visornic_resume - called when IO Partition has recovered
2052  * @dev:           Visornic device that is being serviced.
2053  * @compelte_func: Call when finished.
2054  *
2055  * Called when the IO partition has recovered. Re-establish connection to the IO
2056  * Partition and set the link up. Okay to do DMA again.
2057  *
2058  * Returns 0 for success, negative integer on error.
2059  */
2060 static int visornic_resume(struct visor_device *dev,
2061                            visorbus_state_complete_func complete_func)
2062 {
2063         struct visornic_devdata *devdata;
2064         struct net_device *netdev;
2065         unsigned long flags;
2066
2067         devdata = dev_get_drvdata(&dev->device);
2068         if (!devdata) {
2069                 dev_err(&dev->device, "%s no devdata\n", __func__);
2070                 return -EINVAL;
2071         }
2072
2073         netdev = devdata->netdev;
2074
2075         spin_lock_irqsave(&devdata->priv_lock, flags);
2076         if (devdata->server_change_state) {
2077                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2078                 dev_err(&dev->device, "%s server already changing state\n",
2079                         __func__);
2080                 return -EINVAL;
2081         }
2082         if (!devdata->server_down) {
2083                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2084                 dev_err(&dev->device, "%s server not down\n", __func__);
2085                 complete_func(dev, 0);
2086                 return 0;
2087         }
2088         devdata->server_change_state = true;
2089         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2090
2091         /* Must transition channel to ATTACHED state BEFORE
2092          * we can start using the device again.
2093          * TODO: State transitions
2094          */
2095         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2096
2097         rtnl_lock();
2098         dev_open(netdev);
2099         rtnl_unlock();
2100
2101         complete_func(dev, 0);
2102         return 0;
2103 }
2104
2105 /* This is used to tell the visorbus driver which types of visor devices
2106  * we support, and what functions to call when a visor device that we support
2107  * is attached or removed.
2108  */
2109 static struct visor_driver visornic_driver = {
2110         .name = "visornic",
2111         .owner = THIS_MODULE,
2112         .channel_types = visornic_channel_types,
2113         .probe = visornic_probe,
2114         .remove = visornic_remove,
2115         .pause = visornic_pause,
2116         .resume = visornic_resume,
2117         .channel_interrupt = NULL,
2118 };
2119
2120 /* visornic_init - init function
2121  *
2122  * Init function for the visornic driver. Do initial driver setup and wait
2123  * for devices.
2124  *
2125  * Return: 0 on success, negative integer on error.
2126  */
2127 static int visornic_init(void)
2128 {
2129         int err;
2130
2131         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2132
2133         debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2134                             &debugfs_info_fops);
2135         debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2136                             &debugfs_enable_ints_fops);
2137
2138         err = visorbus_register_visor_driver(&visornic_driver);
2139         if (err)
2140                 debugfs_remove_recursive(visornic_debugfs_dir);
2141
2142         return err;
2143 }
2144
2145 /* visornic_cleanup - driver exit routine
2146  *
2147  * Unregister driver from the bus and free up memory.
2148  */
2149 static void visornic_cleanup(void)
2150 {
2151         visorbus_unregister_visor_driver(&visornic_driver);
2152         debugfs_remove_recursive(visornic_debugfs_dir);
2153 }
2154
2155 module_init(visornic_init);
2156 module_exit(visornic_cleanup);
2157
2158 MODULE_AUTHOR("Unisys");
2159 MODULE_LICENSE("GPL");
2160 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");