1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
6 /* This driver lives in a spar partition, and registers to ethernet io
7 * channels from the visorbus driver. It creates netdev devices and
8 * forwards transmit to the IO channel and accepts rcvs from the IO
9 * Partition via the IO channel.
12 #include <linux/debugfs.h>
13 #include <linux/etherdevice.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/kthread.h>
17 #include <linux/skbuff.h>
18 #include <linux/rtnetlink.h>
19 #include <linux/visorbus.h>
21 #include "iochannel.h"
23 #define VISORNIC_INFINITE_RSP_WAIT 0
25 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
28 #define MAX_BUF 163840
29 #define NAPI_WEIGHT 64
31 /* GUIDS for director channel type supported by this driver. */
32 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
33 #define VISOR_VNIC_CHANNEL_GUID \
34 GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
35 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
36 #define VISOR_VNIC_CHANNEL_GUID_STR \
37 "8cd5994d-c58e-11da-95a9-00e08161165f"
39 static struct visor_channeltype_descriptor visornic_channel_types[] = {
40 /* Note that the only channel type we expect to be reported by the
41 * bus driver is the VISOR_VNIC channel.
43 { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
44 VISOR_VNIC_CHANNEL_VERSIONID },
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
48 /* FIXME XXX: This next line of code must be fixed and removed before
49 * acceptance into the 'normal' part of the kernel. It is only here as a place
50 * holder to get module autoloading functionality working for visorbus. Code
51 * must be added to scripts/mode/file2alias.c, etc., to get this working
54 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
57 unsigned long got_rcv;
58 unsigned long got_enbdisack;
59 unsigned long got_xmit_done;
60 unsigned long xmit_fail;
61 unsigned long sent_enbdis;
62 unsigned long sent_promisc;
63 unsigned long sent_post;
64 unsigned long sent_post_failed;
65 unsigned long sent_xmit;
66 unsigned long reject_count;
67 unsigned long extra_rcvbufs_sent;
70 /* struct visornic_devdata
71 * @enabled: 0 disabled 1 enabled to receive.
72 * @enab_dis_acked: NET_RCV_ENABLE/DISABLE acked by IOPART.
79 * @incarnation_id: incarnation_id lets IOPART know about
81 * @old_flags: flags as they were prior to
83 * @usage: count of users.
84 * @num_rcv_bufs: number of rcv buffers the vnic will post.
85 * @num_rcv_bufs_could_not_alloc:
86 * @num_rcvbuf_in_iovm:
87 * @alloc_failed_in_if_needed_cnt:
88 * @alloc_failed_in_repost_rtn_cnt:
89 * @max_outstanding_net_xmits: absolute max number of outstanding xmits
90 * - should never hit this.
91 * @upper_threshold_net_xmits: high water mark for calling
93 * @lower_threshold_net_xmits: high water mark for calling
95 * @struct xmitbufhead: xmitbufhead - head of the xmit buffer list
96 * sent to the IOPART end.
97 * @server_down_complete_func:
98 * @struct timeout_reset:
99 * @struct *cmdrsp_rcv: cmdrsp_rcv is used for posting/unposting rcv
101 * @struct *xmit_cmdrsp: xmit_cmdrsp - issues NET_XMIT - only one
102 * active xmit at a time.
103 * @server_down: IOPART is down.
104 * @server_change_state: Processing SERVER_CHANGESTATE msg.
105 * @going_away: device is being torn down.
106 * @struct *eth_debugfs_dir:
109 * @interrupts_disabled:
111 * @priv_lock: spinlock to access devdata structures.
112 * @flow_control_upper_hits:
113 * @flow_control_lower_hits:
114 * @n_rcv0: # rcvs of 0 buffers.
115 * @n_rcv1: # rcvs of 1 buffers.
116 * @n_rcv2: # rcvs of 2 buffers.
117 * @n_rcvx: # rcvs of >2 buffers.
118 * @found_repost_rcvbuf_cnt: # repost_rcvbuf_cnt.
119 * @repost_found_skb_cnt: # of found the skb.
120 * @n_repost_deficit: # of lost rcv buffers.
121 * @bad_rcv_buf: # of unknown rcv skb not freed.
122 * @n_rcv_packets_not_accepted: # bogs rcv packets.
123 * @queuefullmsg_logged:
125 * @struct irq_poll_timer:
129 struct visornic_devdata {
130 unsigned short enabled;
131 unsigned short enab_dis_acked;
133 struct visor_device *dev;
134 struct net_device *netdev;
135 struct net_device_stats net_stats;
136 atomic_t interrupt_rcvd;
137 wait_queue_head_t rsp_queue;
138 struct sk_buff **rcvbuf;
140 unsigned short old_flags;
144 int num_rcv_bufs_could_not_alloc;
145 atomic_t num_rcvbuf_in_iovm;
146 unsigned long alloc_failed_in_if_needed_cnt;
147 unsigned long alloc_failed_in_repost_rtn_cnt;
149 unsigned long max_outstanding_net_xmits;
150 unsigned long upper_threshold_net_xmits;
151 unsigned long lower_threshold_net_xmits;
152 struct sk_buff_head xmitbufhead;
154 visorbus_state_complete_func server_down_complete_func;
155 struct work_struct timeout_reset;
156 struct uiscmdrsp *cmdrsp_rcv;
157 struct uiscmdrsp *xmit_cmdrsp;
159 bool server_change_state;
161 struct dentry *eth_debugfs_dir;
163 u64 interrupts_notme;
164 u64 interrupts_disabled;
166 /* spinlock to access devdata structures. */
167 spinlock_t priv_lock;
169 /* flow control counter */
170 u64 flow_control_upper_hits;
171 u64 flow_control_lower_hits;
174 unsigned long n_rcv0;
175 unsigned long n_rcv1;
176 unsigned long n_rcv2;
177 unsigned long n_rcvx;
178 unsigned long found_repost_rcvbuf_cnt;
179 unsigned long repost_found_skb_cnt;
180 unsigned long n_repost_deficit;
181 unsigned long bad_rcv_buf;
182 unsigned long n_rcv_packets_not_accepted;
184 int queuefullmsg_logged;
185 struct chanstat chstat;
186 struct timer_list irq_poll_timer;
187 struct napi_struct napi;
188 struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
191 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
192 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
193 u16 index, u16 max_pi_arr_entries,
194 struct phys_info pi_arr[])
196 u16 i, len, firstlen;
198 firstlen = PI_PAGE_SIZE - inp_off;
199 if (inp_len <= firstlen) {
200 /* The input entry spans only one page - add as is. */
201 if (index >= max_pi_arr_entries)
203 pi_arr[index].pi_pfn = inp_pfn;
204 pi_arr[index].pi_off = (u16)inp_off;
205 pi_arr[index].pi_len = (u16)inp_len;
209 /* This entry spans multiple pages. */
210 for (len = inp_len, i = 0; len;
211 len -= pi_arr[index + i].pi_len, i++) {
212 if (index + i >= max_pi_arr_entries)
214 pi_arr[index + i].pi_pfn = inp_pfn + i;
216 pi_arr[index].pi_off = inp_off;
217 pi_arr[index].pi_len = firstlen;
219 pi_arr[index + i].pi_off = 0;
220 pi_arr[index + i].pi_len = min_t(u16, len,
227 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
228 * array that the IOPART understands
229 * @skb: Skbuff that we are pulling the frags from.
230 * @firstfraglen: Length of first fragment in skb.
231 * @frags_max: Max len of frags array.
232 * @frags: Frags array filled in on output.
234 * Return: Positive integer indicating number of entries filled in frags on
235 * success, negative integer on error.
237 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
238 unsigned int firstfraglen,
239 unsigned int frags_max,
240 struct phys_info frags[])
242 unsigned int count = 0, frag, size, offset = 0, numfrags;
243 unsigned int total_count;
245 numfrags = skb_shinfo(skb)->nr_frags;
247 /* Compute the number of fragments this skb has, and if its more than
248 * frag array can hold, linearize the skb
250 total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
251 if (firstfraglen % PI_PAGE_SIZE)
254 if (total_count > frags_max) {
255 if (skb_linearize(skb))
257 numfrags = skb_shinfo(skb)->nr_frags;
261 while (firstfraglen) {
262 if (count == frags_max)
265 frags[count].pi_pfn =
266 page_to_pfn(virt_to_page(skb->data + offset));
267 frags[count].pi_off =
268 (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
269 size = min_t(unsigned int, firstfraglen,
270 PI_PAGE_SIZE - frags[count].pi_off);
272 /* can take smallest of firstfraglen (what's left) OR
273 * bytes left in the page
275 frags[count].pi_len = size;
276 firstfraglen -= size;
281 if ((count + numfrags) > frags_max)
284 for (frag = 0; frag < numfrags; frag++) {
285 count = add_physinfo_entries(page_to_pfn(
286 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
287 skb_shinfo(skb)->frags[frag].page_offset,
288 skb_shinfo(skb)->frags[frag].size, count,
290 /* add_physinfo_entries only returns
291 * zero if the frags array is out of room
292 * That should never happen because we
293 * fail above, if count+numfrags > frags_max.
299 if (skb_shinfo(skb)->frag_list) {
300 struct sk_buff *skbinlist;
303 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
304 skbinlist = skbinlist->next) {
305 c = visor_copy_fragsinfo_from_skb(skbinlist,
318 static ssize_t enable_ints_write(struct file *file,
319 const char __user *buffer,
320 size_t count, loff_t *ppos)
322 /* Don't want to break ABI here by having a debugfs
323 * file that no longer exists or is writable, so
324 * lets just make this a vestigual function
329 static const struct file_operations debugfs_enable_ints_fops = {
330 .write = enable_ints_write,
333 /* visornic_serverdown_complete - pause device following IOPART going down
334 * @devdata: Device managed by IOPART.
336 * The IO partition has gone down, and we need to do some cleanup for when it
337 * comes back. Treat the IO partition as the link being down.
339 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
341 struct net_device *netdev = devdata->netdev;
343 /* Stop polling for interrupts */
344 del_timer_sync(&devdata->irq_poll_timer);
350 atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
351 devdata->chstat.sent_xmit = 0;
352 devdata->chstat.got_xmit_done = 0;
354 if (devdata->server_down_complete_func)
355 (*devdata->server_down_complete_func)(devdata->dev, 0);
357 devdata->server_down = true;
358 devdata->server_change_state = false;
359 devdata->server_down_complete_func = NULL;
362 /* visornic_serverdown - Command has notified us that IOPART is down
363 * @devdata: Device managed by IOPART.
364 * @complete_func: Function to call when finished.
366 * Schedule the work needed to handle the server down request. Make sure we
367 * haven't already handled the server change state event.
369 * Return: 0 if we scheduled the work, negative integer on error.
371 static int visornic_serverdown(struct visornic_devdata *devdata,
372 visorbus_state_complete_func complete_func)
377 spin_lock_irqsave(&devdata->priv_lock, flags);
378 if (devdata->server_change_state) {
379 dev_dbg(&devdata->dev->device, "%s changing state\n",
384 if (devdata->server_down) {
385 dev_dbg(&devdata->dev->device, "%s already down\n",
390 if (devdata->going_away) {
391 dev_dbg(&devdata->dev->device,
392 "%s aborting because device removal pending\n",
397 devdata->server_change_state = true;
398 devdata->server_down_complete_func = complete_func;
399 spin_unlock_irqrestore(&devdata->priv_lock, flags);
401 visornic_serverdown_complete(devdata);
405 spin_unlock_irqrestore(&devdata->priv_lock, flags);
409 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
410 * @netdev: Network adapter the rcv bufs are attached too.
412 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
413 * so that it can write rcv data into our memory space.
415 * Return: Pointer to sk_buff.
417 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
421 /* NOTE: the first fragment in each rcv buffer is pointed to by
422 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
423 * in length, so the first frag is large enough to hold 1514.
425 skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
429 /* current value of mtu doesn't come into play here; large
430 * packets will just end up using multiple rcv buffers all of
433 skb->len = RCVPOST_BUF_SIZE;
434 /* alloc_skb already zeroes it out for clarification. */
439 /* post_skb - post a skb to the IO Partition
440 * @cmdrsp: Cmdrsp packet to be send to the IO Partition.
441 * @devdata: visornic_devdata to post the skb to.
442 * @skb: Skb to give to the IO partition.
444 * Return: 0 on success, negative integer on error.
446 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
451 cmdrsp->net.buf = skb;
452 cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
453 cmdrsp->net.rcvpost.frag.pi_off =
454 (unsigned long)skb->data & PI_PAGE_MASK;
455 cmdrsp->net.rcvpost.frag.pi_len = skb->len;
456 cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
458 if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
461 cmdrsp->net.type = NET_RCV_POST;
462 cmdrsp->cmdtype = CMD_NET_TYPE;
463 err = visorchannel_signalinsert(devdata->dev->visorchannel,
467 devdata->chstat.sent_post_failed++;
471 atomic_inc(&devdata->num_rcvbuf_in_iovm);
472 devdata->chstat.sent_post++;
476 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
477 * @netdev: Netdevice we are enabling/disabling, used as context return value.
478 * @state: Enable = 1/disable = 0.
479 * @devdata: Visornic device we are enabling/disabling.
481 * Send the enable/disable message to the IO Partition.
483 * Return: 0 on success, negative integer on error.
485 static int send_enbdis(struct net_device *netdev, int state,
486 struct visornic_devdata *devdata)
490 devdata->cmdrsp_rcv->net.enbdis.enable = state;
491 devdata->cmdrsp_rcv->net.enbdis.context = netdev;
492 devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
493 devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
494 err = visorchannel_signalinsert(devdata->dev->visorchannel,
496 devdata->cmdrsp_rcv);
499 devdata->chstat.sent_enbdis++;
503 /* visornic_disable_with_timeout - disable network adapter
504 * @netdev: netdevice to disable.
505 * @timeout: Timeout to wait for disable.
507 * Disable the network adapter and inform the IO Partition that we are disabled.
508 * Reclaim memory from rcv bufs.
510 * Return: 0 on success, negative integer on failure of IO Partition responding.
512 static int visornic_disable_with_timeout(struct net_device *netdev,
515 struct visornic_devdata *devdata = netdev_priv(netdev);
521 /* send a msg telling the other end we are stopping incoming pkts */
522 spin_lock_irqsave(&devdata->priv_lock, flags);
523 devdata->enabled = 0;
524 /* must wait for ack */
525 devdata->enab_dis_acked = 0;
526 spin_unlock_irqrestore(&devdata->priv_lock, flags);
528 /* send disable and wait for ack -- don't hold lock when sending
529 * disable because if the queue is full, insert might sleep.
530 * If an error occurs, don't wait for the timeout.
532 err = send_enbdis(netdev, 0, devdata);
536 /* wait for ack to arrive before we try to free rcv buffers
537 * NOTE: the other end automatically unposts the rcv buffers when
538 * when it gets a disable.
540 spin_lock_irqsave(&devdata->priv_lock, flags);
541 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
543 if (devdata->enab_dis_acked)
545 if (devdata->server_down || devdata->server_change_state) {
546 dev_dbg(&netdev->dev, "%s server went away\n",
550 set_current_state(TASK_INTERRUPTIBLE);
551 spin_unlock_irqrestore(&devdata->priv_lock, flags);
552 wait += schedule_timeout(msecs_to_jiffies(10));
553 spin_lock_irqsave(&devdata->priv_lock, flags);
556 /* Wait for usage to go to 1 (no other users) before freeing
559 if (atomic_read(&devdata->usage) > 1) {
561 set_current_state(TASK_INTERRUPTIBLE);
562 spin_unlock_irqrestore(&devdata->priv_lock, flags);
563 schedule_timeout(msecs_to_jiffies(10));
564 spin_lock_irqsave(&devdata->priv_lock, flags);
565 if (atomic_read(&devdata->usage))
569 /* we've set enabled to 0, so we can give up the lock. */
570 spin_unlock_irqrestore(&devdata->priv_lock, flags);
572 /* stop the transmit queue so nothing more can be transmitted */
573 netif_stop_queue(netdev);
575 napi_disable(&devdata->napi);
577 skb_queue_purge(&devdata->xmitbufhead);
579 /* Free rcv buffers - other end has automatically unposed them on
582 for (i = 0; i < devdata->num_rcv_bufs; i++) {
583 if (devdata->rcvbuf[i]) {
584 kfree_skb(devdata->rcvbuf[i]);
585 devdata->rcvbuf[i] = NULL;
592 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
593 * @netdev: struct netdevice.
594 * @devdata: visornic_devdata.
596 * Allocate rcv buffers and post them to the IO Partition.
598 * Return: 0 on success, negative integer on failure.
600 static int init_rcv_bufs(struct net_device *netdev,
601 struct visornic_devdata *devdata)
603 int i, j, count, err;
605 /* allocate fixed number of receive buffers to post to uisnic
606 * post receive buffers after we've allocated a required amount
608 for (i = 0; i < devdata->num_rcv_bufs; i++) {
609 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
610 /* if we failed to allocate one let us stop */
611 if (!devdata->rcvbuf[i])
614 /* couldn't even allocate one -- bail out */
619 /* Ensure we can alloc 2/3rd of the requested number of buffers.
620 * 2/3 is an arbitrary choice; used also in ndis init.c
622 if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
623 /* free receive buffers we did alloc and then bail out */
624 for (i = 0; i < count; i++) {
625 kfree_skb(devdata->rcvbuf[i]);
626 devdata->rcvbuf[i] = NULL;
631 /* post receive buffers to receive incoming input - without holding
632 * lock - we've not enabled nor started the queue so there shouldn't
633 * be any rcv or xmit activity
635 for (i = 0; i < count; i++) {
636 err = post_skb(devdata->cmdrsp_rcv, devdata,
642 * If we posted at least one skb, we should return success,
643 * but need to free the resources that we have not successfully
646 for (j = i; j < count; j++) {
647 kfree_skb(devdata->rcvbuf[j]);
648 devdata->rcvbuf[j] = NULL;
658 /* visornic_enable_with_timeout - send enable to IO Partition
659 * @netdev: struct net_device.
660 * @timeout: Time to wait for the ACK from the enable.
662 * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
663 * defined in msecs (timeout of 0 specifies infinite wait).
665 * Return: 0 on success, negative integer on failure.
667 static int visornic_enable_with_timeout(struct net_device *netdev,
671 struct visornic_devdata *devdata = netdev_priv(netdev);
675 napi_enable(&devdata->napi);
677 /* NOTE: the other end automatically unposts the rcv buffers when it
680 err = init_rcv_bufs(netdev, devdata);
682 dev_err(&netdev->dev,
683 "%s failed to init rcv bufs\n", __func__);
687 spin_lock_irqsave(&devdata->priv_lock, flags);
688 devdata->enabled = 1;
689 devdata->enab_dis_acked = 0;
691 /* now we're ready, let's send an ENB to uisnic but until we get
692 * an ACK back from uisnic, we'll drop the packets
694 devdata->n_rcv_packets_not_accepted = 0;
695 spin_unlock_irqrestore(&devdata->priv_lock, flags);
697 /* send enable and wait for ack -- don't hold lock when sending enable
698 * because if the queue is full, insert might sleep. If an error
701 err = send_enbdis(netdev, 1, devdata);
705 spin_lock_irqsave(&devdata->priv_lock, flags);
706 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
708 if (devdata->enab_dis_acked)
710 if (devdata->server_down || devdata->server_change_state) {
711 dev_dbg(&netdev->dev, "%s server went away\n",
715 set_current_state(TASK_INTERRUPTIBLE);
716 spin_unlock_irqrestore(&devdata->priv_lock, flags);
717 wait += schedule_timeout(msecs_to_jiffies(10));
718 spin_lock_irqsave(&devdata->priv_lock, flags);
721 spin_unlock_irqrestore(&devdata->priv_lock, flags);
723 if (!devdata->enab_dis_acked) {
724 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
728 netif_start_queue(netdev);
732 /* visornic_timeout_reset - handle xmit timeout resets
733 * @work: Work item that scheduled the work.
735 * Transmit timeouts are typically handled by resetting the device for our
736 * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
737 * respond, we will trigger a serverdown.
739 static void visornic_timeout_reset(struct work_struct *work)
741 struct visornic_devdata *devdata;
742 struct net_device *netdev;
745 devdata = container_of(work, struct visornic_devdata, timeout_reset);
746 netdev = devdata->netdev;
749 if (!netif_running(netdev)) {
754 response = visornic_disable_with_timeout(netdev,
755 VISORNIC_INFINITE_RSP_WAIT);
757 goto call_serverdown;
759 response = visornic_enable_with_timeout(netdev,
760 VISORNIC_INFINITE_RSP_WAIT);
762 goto call_serverdown;
769 visornic_serverdown(devdata, NULL);
773 /* visornic_open - enable the visornic device and mark the queue started
774 * @netdev: netdevice to start.
776 * Enable the device and start the transmit queue.
778 * Return: 0 on success.
780 static int visornic_open(struct net_device *netdev)
782 visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
786 /* visornic_close - disables the visornic device and stops the queues
787 * @netdev: netdevice to stop.
789 * Disable the device and stop the transmit queue.
791 * Return 0 on success.
793 static int visornic_close(struct net_device *netdev)
795 visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
799 /* devdata_xmits_outstanding - compute outstanding xmits
800 * @devdata: visornic_devdata for device
802 * Return: Long integer representing the number of outstanding xmits.
804 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
806 if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
807 return devdata->chstat.sent_xmit -
808 devdata->chstat.got_xmit_done;
809 return (ULONG_MAX - devdata->chstat.got_xmit_done
810 + devdata->chstat.sent_xmit + 1);
813 /* vnic_hit_high_watermark
814 * @devdata: Indicates visornic device we are checking.
815 * @high_watermark: Max num of unacked xmits we will tolerate before we will
818 * Return: True iff the number of unacked xmits sent to the IO Partition is >=
819 * high_watermark. False otherwise.
821 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
822 ulong high_watermark)
824 return (devdata_xmits_outstanding(devdata) >= high_watermark);
827 /* vnic_hit_low_watermark
828 * @devdata: Indicates visornic device we are checking.
829 * @low_watermark: We will wait until the num of unacked xmits drops to this
830 * value or lower before we start transmitting again.
832 * Return: True iff the number of unacked xmits sent to the IO Partition is <=
835 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
838 return (devdata_xmits_outstanding(devdata) <= low_watermark);
841 /* visornic_xmit - send a packet to the IO Partition
842 * @skb: Packet to be sent.
843 * @netdev: Net device the packet is being sent from.
845 * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
846 * the XMIT command to the IO Partition for processing. This function is
847 * protected from concurrent calls by a spinlock xmit_lock in the net_device
848 * struct. As soon as the function returns, it can be called again.
850 * Return: NETDEV_TX_OK.
852 static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
854 struct visornic_devdata *devdata;
855 int len, firstfraglen, padlen;
856 struct uiscmdrsp *cmdrsp = NULL;
860 devdata = netdev_priv(netdev);
861 spin_lock_irqsave(&devdata->priv_lock, flags);
863 if (netif_queue_stopped(netdev) || devdata->server_down ||
864 devdata->server_change_state) {
865 spin_unlock_irqrestore(&devdata->priv_lock, flags);
867 dev_dbg(&netdev->dev,
868 "%s busy - queue stopped\n", __func__);
873 /* sk_buff struct is used to host network data throughout all the
874 * linux network subsystems
878 /* skb->len is the FULL length of data (including fragmentary portion)
879 * skb->data_len is the length of the fragment portion in frags
880 * skb->len - skb->data_len is size of the 1st fragment in skb->data
881 * calculate the length of the first fragment that skb->data is
884 firstfraglen = skb->len - skb->data_len;
885 if (firstfraglen < ETH_HLEN) {
886 spin_unlock_irqrestore(&devdata->priv_lock, flags);
888 dev_err(&netdev->dev,
889 "%s busy - first frag too small (%d)\n",
890 __func__, firstfraglen);
895 if (len < ETH_MIN_PACKET_SIZE &&
896 ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
897 /* pad the packet out to minimum size */
898 padlen = ETH_MIN_PACKET_SIZE - len;
899 memset(&skb->data[len], 0, padlen);
903 firstfraglen += padlen;
906 cmdrsp = devdata->xmit_cmdrsp;
908 memset(cmdrsp, 0, SIZEOF_CMDRSP);
909 cmdrsp->net.type = NET_XMIT;
910 cmdrsp->cmdtype = CMD_NET_TYPE;
912 /* save the pointer to skb -- we'll need it for completion */
913 cmdrsp->net.buf = skb;
915 if (vnic_hit_high_watermark(devdata,
916 devdata->max_outstanding_net_xmits)) {
917 /* extra NET_XMITs queued over to IOVM - need to wait */
918 devdata->chstat.reject_count++;
919 if (!devdata->queuefullmsg_logged &&
920 ((devdata->chstat.reject_count & 0x3ff) == 1))
921 devdata->queuefullmsg_logged = 1;
922 netif_stop_queue(netdev);
923 spin_unlock_irqrestore(&devdata->priv_lock, flags);
925 dev_dbg(&netdev->dev,
926 "%s busy - waiting for iovm to catch up\n",
931 if (devdata->queuefullmsg_logged)
932 devdata->queuefullmsg_logged = 0;
934 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
935 cmdrsp->net.xmt.lincsum.valid = 1;
936 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
937 if (skb_transport_header(skb) > skb->data) {
938 cmdrsp->net.xmt.lincsum.hrawoff =
939 skb_transport_header(skb) - skb->data;
940 cmdrsp->net.xmt.lincsum.hrawoff = 1;
942 if (skb_network_header(skb) > skb->data) {
943 cmdrsp->net.xmt.lincsum.nhrawoff =
944 skb_network_header(skb) - skb->data;
945 cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
947 cmdrsp->net.xmt.lincsum.csum = skb->csum;
949 cmdrsp->net.xmt.lincsum.valid = 0;
952 /* save off the length of the entire data packet */
953 cmdrsp->net.xmt.len = len;
955 /* copy ethernet header from first frag into ocmdrsp
956 * - everything else will be pass in frags & DMA'ed
958 memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
960 /* copy frags info - from skb->data we need to only provide access
963 cmdrsp->net.xmt.num_frags =
964 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
966 cmdrsp->net.xmt.frags);
967 if (cmdrsp->net.xmt.num_frags < 0) {
968 spin_unlock_irqrestore(&devdata->priv_lock, flags);
970 dev_err(&netdev->dev,
971 "%s busy - copy frags failed\n", __func__);
976 err = visorchannel_signalinsert(devdata->dev->visorchannel,
977 IOCHAN_TO_IOPART, cmdrsp);
979 netif_stop_queue(netdev);
980 spin_unlock_irqrestore(&devdata->priv_lock, flags);
982 dev_dbg(&netdev->dev,
983 "%s busy - signalinsert failed\n", __func__);
988 /* Track the skbs that have been sent to the IOVM for XMIT */
989 skb_queue_head(&devdata->xmitbufhead, skb);
991 /* update xmt stats */
992 devdata->net_stats.tx_packets++;
993 devdata->net_stats.tx_bytes += skb->len;
994 devdata->chstat.sent_xmit++;
996 /* check if we have hit the high watermark for netif_stop_queue() */
997 if (vnic_hit_high_watermark(devdata,
998 devdata->upper_threshold_net_xmits)) {
999 /* extra NET_XMITs queued over to IOVM - need to wait */
1000 /* stop queue - call netif_wake_queue() after lower threshold */
1001 netif_stop_queue(netdev);
1002 dev_dbg(&netdev->dev,
1003 "%s busy - invoking iovm flow control\n",
1005 devdata->flow_control_upper_hits++;
1007 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1009 /* skb will be freed when we get back NET_XMIT_DONE */
1010 return NETDEV_TX_OK;
1013 /* visornic_get_stats - returns net_stats of the visornic device
1014 * @netdev: netdevice.
1016 * Return: Pointer to the net_device_stats struct for the device.
1018 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1020 struct visornic_devdata *devdata = netdev_priv(netdev);
1022 return &devdata->net_stats;
1025 /* visornic_change_mtu - changes mtu of device
1026 * @netdev: netdevice.
1027 * @new_mtu: Value of new mtu.
1029 * The device's MTU cannot be changed by system; it must be changed via a
1030 * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1031 * for everything to work. Currently not supported.
1035 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1040 /* visornic_set_multi - set visornic device flags
1041 * @netdev: netdevice.
1043 * The only flag we currently support is IFF_PROMISC.
1045 static void visornic_set_multi(struct net_device *netdev)
1047 struct uiscmdrsp *cmdrsp;
1048 struct visornic_devdata *devdata = netdev_priv(netdev);
1051 if (devdata->old_flags == netdev->flags)
1054 if ((netdev->flags & IFF_PROMISC) ==
1055 (devdata->old_flags & IFF_PROMISC))
1056 goto out_save_flags;
1058 cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1061 cmdrsp->cmdtype = CMD_NET_TYPE;
1062 cmdrsp->net.type = NET_RCV_PROMISC;
1063 cmdrsp->net.enbdis.context = netdev;
1064 cmdrsp->net.enbdis.enable =
1065 netdev->flags & IFF_PROMISC;
1066 err = visorchannel_signalinsert(devdata->dev->visorchannel,
1074 devdata->old_flags = netdev->flags;
1077 /* visornic_xmit_timeout - request to timeout the xmit
1078 * @netdev: netdevice.
1080 * Queue the work and return. Make sure we have not already been informed that
1081 * the IO Partition is gone; if so, we will have already timed-out the xmits.
1083 static void visornic_xmit_timeout(struct net_device *netdev)
1085 struct visornic_devdata *devdata = netdev_priv(netdev);
1086 unsigned long flags;
1088 spin_lock_irqsave(&devdata->priv_lock, flags);
1089 if (devdata->going_away) {
1090 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1091 dev_dbg(&devdata->dev->device,
1092 "%s aborting because device removal pending\n",
1097 /* Ensure that a ServerDown message hasn't been received */
1098 if (!devdata->enabled ||
1099 (devdata->server_down && !devdata->server_change_state)) {
1100 dev_dbg(&netdev->dev, "%s no processing\n",
1102 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1105 schedule_work(&devdata->timeout_reset);
1106 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1109 /* repost_return - repost rcv bufs that have come back
1110 * @cmdrsp: IO channel command struct to post.
1111 * @devdata: Visornic devdata for the device.
1112 * @skb: Socket buffer.
1113 * @netdev: netdevice.
1115 * Repost rcv buffers that have been returned to us when we are finished
1118 * Return: 0 for success, negative integer on error.
1120 static int repost_return(struct uiscmdrsp *cmdrsp,
1121 struct visornic_devdata *devdata,
1122 struct sk_buff *skb, struct net_device *netdev)
1124 struct net_pkt_rcv copy;
1125 int i = 0, cc, numreposted;
1129 copy = cmdrsp->net.rcv;
1130 switch (copy.numrcvbufs) {
1144 for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1145 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1146 if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1149 if ((skb) && devdata->rcvbuf[i] == skb) {
1150 devdata->found_repost_rcvbuf_cnt++;
1152 devdata->repost_found_skb_cnt++;
1154 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1155 if (!devdata->rcvbuf[i]) {
1156 devdata->num_rcv_bufs_could_not_alloc++;
1157 devdata->alloc_failed_in_repost_rtn_cnt++;
1161 status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1163 kfree_skb(devdata->rcvbuf[i]);
1164 devdata->rcvbuf[i] = NULL;
1171 if (numreposted != copy.numrcvbufs) {
1172 devdata->n_repost_deficit++;
1180 devdata->bad_rcv_buf++;
1186 /* visornic_rx - handle receive packets coming back from IO Partition
1187 * @cmdrsp: Receive packet returned from IO Partition.
1189 * Got a receive packet back from the IO Partition; handle it and send it up
1192 * Return: 1 iff an skb was received, otherwise 0.
1194 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1196 struct visornic_devdata *devdata;
1197 struct sk_buff *skb, *prev, *curr;
1198 struct net_device *netdev;
1199 int cc, currsize, off;
1201 unsigned long flags;
1203 /* post new rcv buf to the other end using the cmdrsp we have at hand
1204 * post it without holding lock - but we'll use the signal lock to
1205 * synchronize the queue insert the cmdrsp that contains the net.rcv
1206 * is the one we are using to repost, so copy the info we need from it.
1208 skb = cmdrsp->net.buf;
1211 devdata = netdev_priv(netdev);
1213 spin_lock_irqsave(&devdata->priv_lock, flags);
1214 atomic_dec(&devdata->num_rcvbuf_in_iovm);
1216 /* set length to how much was ACTUALLY received -
1217 * NOTE: rcv_done_len includes actual length of data rcvd
1220 skb->len = cmdrsp->net.rcv.rcv_done_len;
1222 /* update rcv stats - call it with priv_lock held */
1223 devdata->net_stats.rx_packets++;
1224 devdata->net_stats.rx_bytes += skb->len;
1226 /* test enabled while holding lock */
1227 if (!(devdata->enabled && devdata->enab_dis_acked)) {
1228 /* don't process it unless we're in enable mode and until
1229 * we've gotten an ACK saying the other end got our RCV enable
1231 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1232 repost_return(cmdrsp, devdata, skb, netdev);
1236 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1238 /* when skb was allocated, skb->dev, skb->data, skb->len and
1239 * skb->data_len were setup. AND, data has already put into the
1240 * skb (both first frag and in frags pages)
1241 * NOTE: firstfragslen is the amount of data in skb->data and that
1242 * which is not in nr_frags or frag_list. This is now simply
1243 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1244 * firstfrag & set data_len to show rest see if we have to chain
1247 /* do PRECAUTIONARY check */
1248 if (skb->len > RCVPOST_BUF_SIZE) {
1249 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1250 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1251 dev_err(&devdata->netdev->dev,
1252 "repost_return failed");
1255 /* length rcvd is greater than firstfrag in this skb rcv buf */
1256 /* amount in skb->data */
1257 skb->tail += RCVPOST_BUF_SIZE;
1258 /* amount that will be in frag_list */
1259 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1261 /* data fits in this skb - no chaining - do
1262 * PRECAUTIONARY check
1265 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1266 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1267 dev_err(&devdata->netdev->dev,
1268 "repost_return failed");
1271 skb->tail += skb->len;
1272 /* nothing rcvd in frag_list */
1275 off = skb_tail_pointer(skb) - skb->data;
1277 /* amount we bumped tail by in the head skb
1278 * it is used to calculate the size of each chained skb below
1279 * it is also used to index into bufline to continue the copy
1280 * (for chansocktwopc)
1281 * if necessary chain the rcv skbs together.
1282 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1283 * chain the rest to that one.
1284 * - do PRECAUTIONARY check
1286 if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1287 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1288 dev_err(&devdata->netdev->dev, "repost_return failed");
1292 if (cmdrsp->net.rcv.numrcvbufs > 1) {
1293 /* chain the various rcv buffers into the skb's frag_list. */
1294 /* Note: off was initialized above */
1295 for (cc = 1, prev = NULL;
1296 cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1297 curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1299 /* start of list- set head */
1301 skb_shinfo(skb)->frag_list = curr;
1306 /* should we set skb->len and skb->data_len for each
1307 * buffer being chained??? can't hurt!
1309 currsize = min(skb->len - off,
1310 (unsigned int)RCVPOST_BUF_SIZE);
1311 curr->len = currsize;
1312 curr->tail += currsize;
1316 /* assert skb->len == off */
1317 if (skb->len != off) {
1318 netdev_err(devdata->netdev,
1319 "something wrong; skb->len:%d != off:%d\n",
1324 /* set up packet's protocol type using ethernet header - this
1325 * sets up skb->pkt_type & it also PULLS out the eth header
1327 skb->protocol = eth_type_trans(skb, netdev);
1330 skb->ip_summed = CHECKSUM_NONE;
1333 /* accept all packets */
1334 if (netdev->flags & IFF_PROMISC)
1336 if (skb->pkt_type == PACKET_BROADCAST) {
1337 /* accept all broadcast packets */
1338 if (netdev->flags & IFF_BROADCAST)
1340 } else if (skb->pkt_type == PACKET_MULTICAST) {
1341 if ((netdev->flags & IFF_MULTICAST) &&
1342 (netdev_mc_count(netdev))) {
1343 struct netdev_hw_addr *ha;
1346 /* only accept multicast packets that we can
1347 * find in our multicast address list
1349 netdev_for_each_mc_addr(ha, netdev) {
1350 if (ether_addr_equal(eth->h_dest,
1356 /* accept pkt, dest matches a multicast addr */
1360 /* accept packet, h_dest must match vnic mac address */
1361 } else if (skb->pkt_type == PACKET_HOST) {
1363 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1364 /* something is not right */
1365 dev_err(&devdata->netdev->dev,
1366 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1367 netdev->name, eth->h_dest, netdev->dev_addr);
1369 /* drop packet - don't forward it up to OS */
1370 devdata->n_rcv_packets_not_accepted++;
1371 repost_return(cmdrsp, devdata, skb, netdev);
1375 netif_receive_skb(skb);
1376 /* netif_rx returns various values, but "in practice most drivers
1377 * ignore the return value
1381 /* whether the packet got dropped or handled, the skb is freed by
1382 * kernel code, so we shouldn't free it. but we should repost a
1385 repost_return(cmdrsp, devdata, skb, netdev);
1389 /* devdata_initialize - initialize devdata structure
1390 * @devdata: visornic_devdata structure to initialize.
1391 * @dev: visorbus_device it belongs to.
1393 * Setup initial values for the visornic, based on channel and default values.
1395 * Return: A pointer to the devdata structure.
1397 static struct visornic_devdata *devdata_initialize(
1398 struct visornic_devdata *devdata,
1399 struct visor_device *dev)
1402 devdata->incarnation_id = get_jiffies_64();
1406 /* devdata_release - free up references in devdata
1407 * @devdata: Struct to clean up.
1409 static void devdata_release(struct visornic_devdata *devdata)
1411 kfree(devdata->rcvbuf);
1412 kfree(devdata->cmdrsp_rcv);
1413 kfree(devdata->xmit_cmdrsp);
1416 static const struct net_device_ops visornic_dev_ops = {
1417 .ndo_open = visornic_open,
1418 .ndo_stop = visornic_close,
1419 .ndo_start_xmit = visornic_xmit,
1420 .ndo_get_stats = visornic_get_stats,
1421 .ndo_change_mtu = visornic_change_mtu,
1422 .ndo_tx_timeout = visornic_xmit_timeout,
1423 .ndo_set_rx_mode = visornic_set_multi,
1427 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1428 size_t len, loff_t *offset)
1430 ssize_t bytes_read = 0;
1432 struct visornic_devdata *devdata;
1433 struct net_device *dev;
1438 vbuf = kzalloc(len, GFP_KERNEL);
1442 /* for each vnic channel dump out channel specific data */
1444 for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1445 /* Only consider netdevs that are visornic, and are open */
1446 if (dev->netdev_ops != &visornic_dev_ops ||
1447 (!netif_queue_stopped(dev)))
1450 devdata = netdev_priv(dev);
1451 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1452 "netdev = %s (0x%p), MAC Addr %pM\n",
1456 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457 "VisorNic Dev Info = 0x%p\n", devdata);
1458 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459 " num_rcv_bufs = %d\n",
1460 devdata->num_rcv_bufs);
1461 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462 " max_outstanding_next_xmits = %lu\n",
1463 devdata->max_outstanding_net_xmits);
1464 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465 " upper_threshold_net_xmits = %lu\n",
1466 devdata->upper_threshold_net_xmits);
1467 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468 " lower_threshold_net_xmits = %lu\n",
1469 devdata->lower_threshold_net_xmits);
1470 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471 " queuefullmsg_logged = %d\n",
1472 devdata->queuefullmsg_logged);
1473 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474 " chstat.got_rcv = %lu\n",
1475 devdata->chstat.got_rcv);
1476 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477 " chstat.got_enbdisack = %lu\n",
1478 devdata->chstat.got_enbdisack);
1479 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480 " chstat.got_xmit_done = %lu\n",
1481 devdata->chstat.got_xmit_done);
1482 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483 " chstat.xmit_fail = %lu\n",
1484 devdata->chstat.xmit_fail);
1485 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486 " chstat.sent_enbdis = %lu\n",
1487 devdata->chstat.sent_enbdis);
1488 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489 " chstat.sent_promisc = %lu\n",
1490 devdata->chstat.sent_promisc);
1491 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492 " chstat.sent_post = %lu\n",
1493 devdata->chstat.sent_post);
1494 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495 " chstat.sent_post_failed = %lu\n",
1496 devdata->chstat.sent_post_failed);
1497 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498 " chstat.sent_xmit = %lu\n",
1499 devdata->chstat.sent_xmit);
1500 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501 " chstat.reject_count = %lu\n",
1502 devdata->chstat.reject_count);
1503 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1504 " chstat.extra_rcvbufs_sent = %lu\n",
1505 devdata->chstat.extra_rcvbufs_sent);
1506 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507 " n_rcv0 = %lu\n", devdata->n_rcv0);
1508 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509 " n_rcv1 = %lu\n", devdata->n_rcv1);
1510 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511 " n_rcv2 = %lu\n", devdata->n_rcv2);
1512 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513 " n_rcvx = %lu\n", devdata->n_rcvx);
1514 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515 " num_rcvbuf_in_iovm = %d\n",
1516 atomic_read(&devdata->num_rcvbuf_in_iovm));
1517 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518 " alloc_failed_in_if_needed_cnt = %lu\n",
1519 devdata->alloc_failed_in_if_needed_cnt);
1520 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1521 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1522 devdata->alloc_failed_in_repost_rtn_cnt);
1523 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524 * " inner_loop_limit_reached_cnt = %lu\n",
1525 * devdata->inner_loop_limit_reached_cnt);
1527 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528 " found_repost_rcvbuf_cnt = %lu\n",
1529 devdata->found_repost_rcvbuf_cnt);
1530 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531 " repost_found_skb_cnt = %lu\n",
1532 devdata->repost_found_skb_cnt);
1533 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534 " n_repost_deficit = %lu\n",
1535 devdata->n_repost_deficit);
1536 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537 " bad_rcv_buf = %lu\n",
1538 devdata->bad_rcv_buf);
1539 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540 " n_rcv_packets_not_accepted = %lu\n",
1541 devdata->n_rcv_packets_not_accepted);
1542 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543 " interrupts_rcvd = %llu\n",
1544 devdata->interrupts_rcvd);
1545 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546 " interrupts_notme = %llu\n",
1547 devdata->interrupts_notme);
1548 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549 " interrupts_disabled = %llu\n",
1550 devdata->interrupts_disabled);
1551 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552 " busy_cnt = %llu\n",
1554 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555 " flow_control_upper_hits = %llu\n",
1556 devdata->flow_control_upper_hits);
1557 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1558 " flow_control_lower_hits = %llu\n",
1559 devdata->flow_control_lower_hits);
1560 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561 " netif_queue = %s\n",
1562 netif_queue_stopped(devdata->netdev) ?
1563 "stopped" : "running");
1564 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1565 " xmits_outstanding = %lu\n",
1566 devdata_xmits_outstanding(devdata));
1569 bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1574 static struct dentry *visornic_debugfs_dir;
1575 static const struct file_operations debugfs_info_fops = {
1576 .read = info_debugfs_read,
1579 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1580 * @devdata: Visornic device.
1582 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1585 struct net_device *netdev;
1586 struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1587 int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1590 /* don't do this until vnic is marked ready */
1591 if (!(devdata->enabled && devdata->enab_dis_acked))
1594 netdev = devdata->netdev;
1595 rcv_bufs_allocated = 0;
1596 /* this code is trying to prevent getting stuck here forever,
1597 * but still retry it if you cant allocate them all this time.
1599 cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1600 while (cur_num_rcv_bufs_to_alloc > 0) {
1601 cur_num_rcv_bufs_to_alloc--;
1602 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1603 if (devdata->rcvbuf[i])
1605 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1606 if (!devdata->rcvbuf[i]) {
1607 devdata->alloc_failed_in_if_needed_cnt++;
1610 rcv_bufs_allocated++;
1611 err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1613 kfree_skb(devdata->rcvbuf[i]);
1614 devdata->rcvbuf[i] = NULL;
1617 devdata->chstat.extra_rcvbufs_sent++;
1620 devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1623 /* drain_resp_queue - drains and ignores all messages from the resp queue
1624 * @cmdrsp: IO channel command response message.
1625 * @devdata: Visornic device to drain.
1627 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1628 struct visornic_devdata *devdata)
1630 while (!visorchannel_signalremove(devdata->dev->visorchannel,
1636 /* service_resp_queue - drain the response queue
1637 * @cmdrsp: IO channel command response message.
1638 * @devdata: Visornic device to drain.
1642 * Drain the response queue of any responses from the IO Partition. Process the
1643 * responses as we get them.
1645 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1646 struct visornic_devdata *devdata,
1647 int *rx_work_done, int budget)
1649 unsigned long flags;
1650 struct net_device *netdev;
1652 while (*rx_work_done < budget) {
1653 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1657 if (visorchannel_signalremove(devdata->dev->visorchannel,
1662 switch (cmdrsp->net.type) {
1664 devdata->chstat.got_rcv++;
1665 /* process incoming packet */
1666 *rx_work_done += visornic_rx(cmdrsp);
1669 spin_lock_irqsave(&devdata->priv_lock, flags);
1670 devdata->chstat.got_xmit_done++;
1671 if (cmdrsp->net.xmtdone.xmt_done_result)
1672 devdata->chstat.xmit_fail++;
1673 /* only call queue wake if we stopped it */
1674 netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1675 /* ASSERT netdev == vnicinfo->netdev; */
1676 if (netdev == devdata->netdev &&
1677 netif_queue_stopped(netdev)) {
1678 /* check if we have crossed the lower watermark
1679 * for netif_wake_queue()
1681 if (vnic_hit_low_watermark
1683 devdata->lower_threshold_net_xmits)) {
1684 /* enough NET_XMITs completed
1685 * so can restart netif queue
1687 netif_wake_queue(netdev);
1688 devdata->flow_control_lower_hits++;
1691 skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1692 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1693 kfree_skb(cmdrsp->net.buf);
1695 case NET_RCV_ENBDIS_ACK:
1696 devdata->chstat.got_enbdisack++;
1697 netdev = (struct net_device *)
1698 cmdrsp->net.enbdis.context;
1699 spin_lock_irqsave(&devdata->priv_lock, flags);
1700 devdata->enab_dis_acked = 1;
1701 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1703 if (devdata->server_down &&
1704 devdata->server_change_state) {
1705 /* Inform Linux that the link is up */
1706 devdata->server_down = false;
1707 devdata->server_change_state = false;
1708 netif_wake_queue(netdev);
1709 netif_carrier_on(netdev);
1712 case NET_CONNECT_STATUS:
1713 netdev = devdata->netdev;
1714 if (cmdrsp->net.enbdis.enable == 1) {
1715 spin_lock_irqsave(&devdata->priv_lock, flags);
1716 devdata->enabled = cmdrsp->net.enbdis.enable;
1717 spin_unlock_irqrestore(&devdata->priv_lock,
1719 netif_wake_queue(netdev);
1720 netif_carrier_on(netdev);
1722 netif_stop_queue(netdev);
1723 netif_carrier_off(netdev);
1724 spin_lock_irqsave(&devdata->priv_lock, flags);
1725 devdata->enabled = cmdrsp->net.enbdis.enable;
1726 spin_unlock_irqrestore(&devdata->priv_lock,
1733 /* cmdrsp is now available for reuse */
1737 static int visornic_poll(struct napi_struct *napi, int budget)
1739 struct visornic_devdata *devdata = container_of(napi,
1740 struct visornic_devdata,
1744 send_rcv_posts_if_needed(devdata);
1745 service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1747 /* If there aren't any more packets to receive stop the poll */
1748 if (rx_count < budget)
1749 napi_complete_done(napi, rx_count);
1754 /* poll_for_irq - checks the status of the response queue
1755 * @v: Void pointer to the visronic devdata struct.
1757 * Main function of the vnic_incoming thread. Periodically check the response
1758 * queue and drain it if needed.
1760 static void poll_for_irq(struct timer_list *t)
1762 struct visornic_devdata *devdata = from_timer(devdata, t,
1765 if (!visorchannel_signalempty(
1766 devdata->dev->visorchannel,
1767 IOCHAN_FROM_IOPART))
1768 napi_schedule(&devdata->napi);
1770 atomic_set(&devdata->interrupt_rcvd, 0);
1772 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1775 /* visornic_probe - probe function for visornic devices
1776 * @dev: The visor device discovered.
1778 * Called when visorbus discovers a visornic device on its bus. It creates a new
1779 * visornic ethernet adapter.
1781 * Return: 0 on success, or negative integer on error.
1783 static int visornic_probe(struct visor_device *dev)
1785 struct visornic_devdata *devdata = NULL;
1786 struct net_device *netdev = NULL;
1788 int channel_offset = 0;
1791 netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1793 dev_err(&dev->device,
1794 "%s alloc_etherdev failed\n", __func__);
1798 netdev->netdev_ops = &visornic_dev_ops;
1799 netdev->watchdog_timeo = 5 * HZ;
1800 SET_NETDEV_DEV(netdev, &dev->device);
1802 /* Get MAC address from channel and read it into the device. */
1803 netdev->addr_len = ETH_ALEN;
1804 channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1805 err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1808 dev_err(&dev->device,
1809 "%s failed to get mac addr from chan (%d)\n",
1811 goto cleanup_netdev;
1814 devdata = devdata_initialize(netdev_priv(netdev), dev);
1816 dev_err(&dev->device,
1817 "%s devdata_initialize failed\n", __func__);
1819 goto cleanup_netdev;
1821 /* don't trust messages laying around in the channel */
1822 drain_resp_queue(devdata->cmdrsp, devdata);
1824 devdata->netdev = netdev;
1825 dev_set_drvdata(&dev->device, devdata);
1826 init_waitqueue_head(&devdata->rsp_queue);
1827 spin_lock_init(&devdata->priv_lock);
1829 devdata->enabled = 0;
1830 atomic_set(&devdata->usage, 1);
1832 /* Setup rcv bufs */
1833 channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1834 err = visorbus_read_channel(dev, channel_offset,
1835 &devdata->num_rcv_bufs, 4);
1837 dev_err(&dev->device,
1838 "%s failed to get #rcv bufs from chan (%d)\n",
1840 goto cleanup_netdev;
1843 devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1844 sizeof(struct sk_buff *), GFP_KERNEL);
1845 if (!devdata->rcvbuf) {
1847 goto cleanup_netdev;
1850 /* set the net_xmit outstanding threshold
1851 * always leave two slots open but you should have 3 at a minimum
1852 * note that max_outstanding_net_xmits must be > 0
1854 devdata->max_outstanding_net_xmits =
1855 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1856 devdata->upper_threshold_net_xmits =
1857 max_t(unsigned long,
1858 2, (devdata->max_outstanding_net_xmits - 1));
1859 devdata->lower_threshold_net_xmits =
1860 max_t(unsigned long,
1861 1, (devdata->max_outstanding_net_xmits / 2));
1863 skb_queue_head_init(&devdata->xmitbufhead);
1865 /* create a cmdrsp we can use to post and unpost rcv buffers */
1866 devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1867 if (!devdata->cmdrsp_rcv) {
1869 goto cleanup_rcvbuf;
1871 devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1872 if (!devdata->xmit_cmdrsp) {
1874 goto cleanup_cmdrsp_rcv;
1876 INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1877 devdata->server_down = false;
1878 devdata->server_change_state = false;
1880 /*set the default mtu */
1881 channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1882 err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1884 dev_err(&dev->device,
1885 "%s failed to get mtu from chan (%d)\n",
1887 goto cleanup_xmit_cmdrsp;
1890 /* TODO: Setup Interrupt information */
1891 /* Let's start our threads to get responses */
1892 netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1894 timer_setup(&devdata->irq_poll_timer, poll_for_irq, 0);
1895 /* Note: This time has to start running before the while
1896 * loop below because the napi routine is responsible for
1897 * setting enab_dis_acked
1899 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1901 channel_offset = offsetof(struct visor_io_channel,
1902 channel_header.features);
1903 err = visorbus_read_channel(dev, channel_offset, &features, 8);
1905 dev_err(&dev->device,
1906 "%s failed to get features from chan (%d)\n",
1908 goto cleanup_napi_add;
1911 features |= VISOR_CHANNEL_IS_POLLING;
1912 features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1913 err = visorbus_write_channel(dev, channel_offset, &features, 8);
1915 dev_err(&dev->device,
1916 "%s failed to set features in chan (%d)\n",
1918 goto cleanup_napi_add;
1921 /* Note: Interrupts have to be enable before the while
1922 * loop below because the napi routine is responsible for
1923 * setting enab_dis_acked
1925 visorbus_enable_channel_interrupts(dev);
1927 err = register_netdev(netdev);
1929 dev_err(&dev->device,
1930 "%s register_netdev failed (%d)\n", __func__, err);
1931 goto cleanup_napi_add;
1934 /* create debug/sysfs directories */
1935 devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1936 visornic_debugfs_dir);
1937 if (!devdata->eth_debugfs_dir) {
1938 dev_err(&dev->device,
1939 "%s debugfs_create_dir %s failed\n",
1940 __func__, netdev->name);
1942 goto cleanup_register_netdev;
1945 dev_info(&dev->device, "%s success netdev=%s\n",
1946 __func__, netdev->name);
1949 cleanup_register_netdev:
1950 unregister_netdev(netdev);
1953 del_timer_sync(&devdata->irq_poll_timer);
1954 netif_napi_del(&devdata->napi);
1956 cleanup_xmit_cmdrsp:
1957 kfree(devdata->xmit_cmdrsp);
1960 kfree(devdata->cmdrsp_rcv);
1963 kfree(devdata->rcvbuf);
1966 free_netdev(netdev);
1970 /* host_side_disappeared - IO Partition is gone
1971 * @devdata: Device object.
1973 * IO partition servicing this device is gone; do cleanup.
1975 static void host_side_disappeared(struct visornic_devdata *devdata)
1977 unsigned long flags;
1979 spin_lock_irqsave(&devdata->priv_lock, flags);
1980 /* indicate device destroyed */
1981 devdata->dev = NULL;
1982 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1985 /* visornic_remove - called when visornic dev goes away
1986 * @dev: Visornic device that is being removed.
1988 * Called when DEVICE_DESTROY gets called to remove device.
1990 static void visornic_remove(struct visor_device *dev)
1992 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1993 struct net_device *netdev;
1994 unsigned long flags;
1997 dev_err(&dev->device, "%s no devdata\n", __func__);
2000 spin_lock_irqsave(&devdata->priv_lock, flags);
2001 if (devdata->going_away) {
2002 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2003 dev_err(&dev->device, "%s already being removed\n", __func__);
2006 devdata->going_away = true;
2007 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2008 netdev = devdata->netdev;
2010 dev_err(&dev->device, "%s not net device\n", __func__);
2014 /* going_away prevents new items being added to the workqueues */
2015 cancel_work_sync(&devdata->timeout_reset);
2017 debugfs_remove_recursive(devdata->eth_debugfs_dir);
2018 /* this will call visornic_close() */
2019 unregister_netdev(netdev);
2021 del_timer_sync(&devdata->irq_poll_timer);
2022 netif_napi_del(&devdata->napi);
2024 dev_set_drvdata(&dev->device, NULL);
2025 host_side_disappeared(devdata);
2026 devdata_release(devdata);
2027 free_netdev(netdev);
2030 /* visornic_pause - called when IO Part disappears
2031 * @dev: Visornic device that is being serviced.
2032 * @complete_func: Call when finished.
2034 * Called when the IO Partition has gone down. Need to free up resources and
2035 * wait for IO partition to come back. Mark link as down and don't attempt any
2036 * DMA. When we have freed memory, call the complete_func so that Command knows
2037 * we are done. If we don't call complete_func, the IO Partition will never
2040 * Return: 0 on success.
2042 static int visornic_pause(struct visor_device *dev,
2043 visorbus_state_complete_func complete_func)
2045 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2047 visornic_serverdown(devdata, complete_func);
2051 /* visornic_resume - called when IO Partition has recovered
2052 * @dev: Visornic device that is being serviced.
2053 * @compelte_func: Call when finished.
2055 * Called when the IO partition has recovered. Re-establish connection to the IO
2056 * Partition and set the link up. Okay to do DMA again.
2058 * Returns 0 for success, negative integer on error.
2060 static int visornic_resume(struct visor_device *dev,
2061 visorbus_state_complete_func complete_func)
2063 struct visornic_devdata *devdata;
2064 struct net_device *netdev;
2065 unsigned long flags;
2067 devdata = dev_get_drvdata(&dev->device);
2069 dev_err(&dev->device, "%s no devdata\n", __func__);
2073 netdev = devdata->netdev;
2075 spin_lock_irqsave(&devdata->priv_lock, flags);
2076 if (devdata->server_change_state) {
2077 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2078 dev_err(&dev->device, "%s server already changing state\n",
2082 if (!devdata->server_down) {
2083 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2084 dev_err(&dev->device, "%s server not down\n", __func__);
2085 complete_func(dev, 0);
2088 devdata->server_change_state = true;
2089 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2091 /* Must transition channel to ATTACHED state BEFORE
2092 * we can start using the device again.
2093 * TODO: State transitions
2095 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2101 complete_func(dev, 0);
2105 /* This is used to tell the visorbus driver which types of visor devices
2106 * we support, and what functions to call when a visor device that we support
2107 * is attached or removed.
2109 static struct visor_driver visornic_driver = {
2111 .owner = THIS_MODULE,
2112 .channel_types = visornic_channel_types,
2113 .probe = visornic_probe,
2114 .remove = visornic_remove,
2115 .pause = visornic_pause,
2116 .resume = visornic_resume,
2117 .channel_interrupt = NULL,
2120 /* visornic_init - init function
2122 * Init function for the visornic driver. Do initial driver setup and wait
2125 * Return: 0 on success, negative integer on error.
2127 static int visornic_init(void)
2131 visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2133 debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2134 &debugfs_info_fops);
2135 debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2136 &debugfs_enable_ints_fops);
2138 err = visorbus_register_visor_driver(&visornic_driver);
2140 debugfs_remove_recursive(visornic_debugfs_dir);
2145 /* visornic_cleanup - driver exit routine
2147 * Unregister driver from the bus and free up memory.
2149 static void visornic_cleanup(void)
2151 visorbus_unregister_visor_driver(&visornic_driver);
2152 debugfs_remove_recursive(visornic_debugfs_dir);
2155 module_init(visornic_init);
2156 module_exit(visornic_cleanup);
2158 MODULE_AUTHOR("Unisys");
2159 MODULE_LICENSE("GPL");
2160 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");