1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for more
15 /* This driver lives in a spar partition, and registers to ethernet io
16 * channels from the visorbus driver. It creates netdev devices and
17 * forwards transmit to the IO channel and accepts rcvs from the IO
18 * Partition via the IO channel.
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
29 #include "iochannel.h"
31 #define VISORNIC_INFINITE_RSP_WAIT 0
33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36 #define MAX_BUF 163840
37 #define NAPI_WEIGHT 64
39 /* GUIDS for director channel type supported by this driver. */
40 static struct visor_channeltype_descriptor visornic_channel_types[] = {
41 /* Note that the only channel type we expect to be reported by the
42 * bus driver is the SPAR_VNIC channel.
44 { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
45 { NULL_UUID_LE, NULL }
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
49 * FIXME XXX: This next line of code must be fixed and removed before
50 * acceptance into the 'normal' part of the kernel. It is only here as a place
51 * holder to get module autoloading functionality working for visorbus. Code
52 * must be added to scripts/mode/file2alias.c, etc., to get this working
55 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
58 unsigned long got_rcv;
59 unsigned long got_enbdisack;
60 unsigned long got_xmit_done;
61 unsigned long xmit_fail;
62 unsigned long sent_enbdis;
63 unsigned long sent_promisc;
64 unsigned long sent_post;
65 unsigned long sent_post_failed;
66 unsigned long sent_xmit;
67 unsigned long reject_count;
68 unsigned long extra_rcvbufs_sent;
71 struct visornic_devdata {
72 /* 0 disabled 1 enabled to receive */
73 unsigned short enabled;
74 /* NET_RCV_ENABLE/DISABLE acked by IOPART */
75 unsigned short enab_dis_acked;
77 struct visor_device *dev;
78 struct net_device *netdev;
79 struct net_device_stats net_stats;
80 atomic_t interrupt_rcvd;
81 wait_queue_head_t rsp_queue;
82 struct sk_buff **rcvbuf;
83 /* incarnation_id lets IOPART know about re-birth */
85 /* flags as they were prior to set_multicast_list */
86 unsigned short old_flags;
87 atomic_t usage; /* count of users */
89 /* number of rcv buffers the vnic will post */
91 int num_rcv_bufs_could_not_alloc;
92 atomic_t num_rcvbuf_in_iovm;
93 unsigned long alloc_failed_in_if_needed_cnt;
94 unsigned long alloc_failed_in_repost_rtn_cnt;
96 /* absolute max number of outstanding xmits - should never hit this */
97 unsigned long max_outstanding_net_xmits;
98 /* high water mark for calling netif_stop_queue() */
99 unsigned long upper_threshold_net_xmits;
100 /* high water mark for calling netif_wake_queue() */
101 unsigned long lower_threshold_net_xmits;
102 /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
103 struct sk_buff_head xmitbufhead;
105 visorbus_state_complete_func server_down_complete_func;
106 struct work_struct timeout_reset;
107 /* cmdrsp_rcv is used for posting/unposting rcv buffers */
108 struct uiscmdrsp *cmdrsp_rcv;
109 /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
110 struct uiscmdrsp *xmit_cmdrsp;
112 bool server_down; /* IOPART is down */
113 bool server_change_state; /* Processing SERVER_CHANGESTATE msg */
114 bool going_away; /* device is being torn down */
115 struct dentry *eth_debugfs_dir;
117 u64 interrupts_notme;
118 u64 interrupts_disabled;
120 spinlock_t priv_lock; /* spinlock to access devdata structures */
122 /* flow control counter */
123 u64 flow_control_upper_hits;
124 u64 flow_control_lower_hits;
127 unsigned long n_rcv0; /* # rcvs of 0 buffers */
128 unsigned long n_rcv1; /* # rcvs of 1 buffers */
129 unsigned long n_rcv2; /* # rcvs of 2 buffers */
130 unsigned long n_rcvx; /* # rcvs of >2 buffers */
131 unsigned long found_repost_rcvbuf_cnt; /* # repost_rcvbuf_cnt */
132 unsigned long repost_found_skb_cnt; /* # of found the skb */
133 unsigned long n_repost_deficit; /* # of lost rcv buffers */
134 unsigned long bad_rcv_buf; /* # of unknown rcv skb not freed */
135 unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
137 int queuefullmsg_logged;
138 struct chanstat chstat;
139 struct timer_list irq_poll_timer;
140 struct napi_struct napi;
141 struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
144 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
146 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u32 inp_len, u16 index,
147 u16 max_pi_arr_entries, struct phys_info pi_arr[])
152 firstlen = PI_PAGE_SIZE - inp_off;
153 if (inp_len <= firstlen) {
154 /* The input entry spans only one page - add as is. */
155 if (index >= max_pi_arr_entries)
157 pi_arr[index].pi_pfn = inp_pfn;
158 pi_arr[index].pi_off = (u16)inp_off;
159 pi_arr[index].pi_len = (u16)inp_len;
163 /* This entry spans multiple pages. */
164 for (len = inp_len, i = 0; len;
165 len -= pi_arr[index + i].pi_len, i++) {
166 if (index + i >= max_pi_arr_entries)
168 pi_arr[index + i].pi_pfn = inp_pfn + i;
170 pi_arr[index].pi_off = inp_off;
171 pi_arr[index].pi_len = firstlen;
173 pi_arr[index + i].pi_off = 0;
174 pi_arr[index + i].pi_len =
175 (u16)MINNUM(len, (u32)PI_PAGE_SIZE);
182 * visor_copy_fragsinfo_from_skb(
183 * @skb_in: skbuff that we are pulling the frags from
184 * @firstfraglen: length of first fragment in skb
185 * @frags_max: max len of frags array
186 * @frags: frags array filled in on output
188 * Copy the fragment list in the SKB to a phys_info
189 * array that the IOPART understands.
190 * Return value indicates number of entries filled in frags
191 * Negative values indicate an error.
194 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
195 unsigned int frags_max,
196 struct phys_info frags[])
198 unsigned int count = 0, frag, size, offset = 0, numfrags;
199 unsigned int total_count;
201 numfrags = skb_shinfo(skb)->nr_frags;
203 /* Compute the number of fragments this skb has, and if its more than
204 * frag array can hold, linearize the skb
206 total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
207 if (firstfraglen % PI_PAGE_SIZE)
210 if (total_count > frags_max) {
211 if (skb_linearize(skb))
213 numfrags = skb_shinfo(skb)->nr_frags;
217 while (firstfraglen) {
218 if (count == frags_max)
221 frags[count].pi_pfn =
222 page_to_pfn(virt_to_page(skb->data + offset));
223 frags[count].pi_off =
224 (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
225 size = min_t(unsigned int, firstfraglen,
226 PI_PAGE_SIZE - frags[count].pi_off);
228 /* can take smallest of firstfraglen (what's left) OR
229 * bytes left in the page
231 frags[count].pi_len = size;
232 firstfraglen -= size;
237 if ((count + numfrags) > frags_max)
240 for (frag = 0; frag < numfrags; frag++) {
241 count = add_physinfo_entries(page_to_pfn(
242 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
243 skb_shinfo(skb)->frags[frag].
245 skb_shinfo(skb)->frags[frag].
246 size, count, frags_max, frags);
247 /* add_physinfo_entries only returns
248 * zero if the frags array is out of room
249 * That should never happen because we
250 * fail above, if count+numfrags > frags_max.
256 if (skb_shinfo(skb)->frag_list) {
257 struct sk_buff *skbinlist;
260 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
261 skbinlist = skbinlist->next) {
262 c = visor_copy_fragsinfo_from_skb(skbinlist,
275 static ssize_t enable_ints_write(struct file *file,
276 const char __user *buffer,
277 size_t count, loff_t *ppos)
279 /* Don't want to break ABI here by having a debugfs
280 * file that no longer exists or is writable, so
281 * lets just make this a vestigual function
286 static const struct file_operations debugfs_enable_ints_fops = {
287 .write = enable_ints_write,
291 * visornic_serverdown_complete - IOPART went down, pause device
292 * @work: Work queue it was scheduled on
294 * The IO partition has gone down and we need to do some cleanup
295 * for when it comes back. Treat the IO partition as the link
300 visornic_serverdown_complete(struct visornic_devdata *devdata)
302 struct net_device *netdev;
304 netdev = devdata->netdev;
306 /* Stop polling for interrupts */
307 del_timer_sync(&devdata->irq_poll_timer);
313 atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
314 devdata->chstat.sent_xmit = 0;
315 devdata->chstat.got_xmit_done = 0;
317 if (devdata->server_down_complete_func)
318 (*devdata->server_down_complete_func)(devdata->dev, 0);
320 devdata->server_down = true;
321 devdata->server_change_state = false;
322 devdata->server_down_complete_func = NULL;
326 * visornic_serverdown - Command has notified us that IOPART is down
327 * @devdata: device that is being managed by IOPART
329 * Schedule the work needed to handle the server down request. Make
330 * sure we haven't already handled the server change state event.
331 * Returns 0 if we scheduled the work, -EINVAL on error.
334 visornic_serverdown(struct visornic_devdata *devdata,
335 visorbus_state_complete_func complete_func)
340 spin_lock_irqsave(&devdata->priv_lock, flags);
341 if (devdata->server_change_state) {
342 dev_dbg(&devdata->dev->device, "%s changing state\n",
347 if (devdata->server_down) {
348 dev_dbg(&devdata->dev->device, "%s already down\n",
353 if (devdata->going_away) {
354 dev_dbg(&devdata->dev->device,
355 "%s aborting because device removal pending\n",
360 devdata->server_change_state = true;
361 devdata->server_down_complete_func = complete_func;
362 spin_unlock_irqrestore(&devdata->priv_lock, flags);
364 visornic_serverdown_complete(devdata);
368 spin_unlock_irqrestore(&devdata->priv_lock, flags);
373 * alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition.
374 * @netdev: network adapter the rcv bufs are attached too.
376 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
377 * so that it can write rcv data into our memory space.
378 * Return pointer to sk_buff
380 static struct sk_buff *
381 alloc_rcv_buf(struct net_device *netdev)
385 /* NOTE: the first fragment in each rcv buffer is pointed to by
386 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
387 * in length, so the first frag is large enough to hold 1514.
389 skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
393 /* current value of mtu doesn't come into play here; large
394 * packets will just end up using multiple rcv buffers all of
397 skb->len = RCVPOST_BUF_SIZE;
398 /* alloc_skb already zeroes it out for clarification. */
404 * post_skb - post a skb to the IO Partition.
405 * @cmdrsp: cmdrsp packet to be send to the IO Partition
406 * @devdata: visornic_devdata to post the skb too
407 * @skb: skb to give to the IO partition
409 * Send the skb to the IO Partition.
413 post_skb(struct uiscmdrsp *cmdrsp,
414 struct visornic_devdata *devdata, struct sk_buff *skb)
418 cmdrsp->net.buf = skb;
419 cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
420 cmdrsp->net.rcvpost.frag.pi_off =
421 (unsigned long)skb->data & PI_PAGE_MASK;
422 cmdrsp->net.rcvpost.frag.pi_len = skb->len;
423 cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
425 if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
428 cmdrsp->net.type = NET_RCV_POST;
429 cmdrsp->cmdtype = CMD_NET_TYPE;
430 err = visorchannel_signalinsert(devdata->dev->visorchannel,
434 devdata->chstat.sent_post_failed++;
438 atomic_inc(&devdata->num_rcvbuf_in_iovm);
439 devdata->chstat.sent_post++;
445 * send_enbdis - send NET_RCV_ENBDIS to IO Partition
446 * @netdev: netdevice we are enable/disable, used as context
448 * @state: enable = 1/disable = 0
449 * @devdata: visornic device we are enabling/disabling
451 * Send the enable/disable message to the IO Partition.
455 send_enbdis(struct net_device *netdev, int state,
456 struct visornic_devdata *devdata)
460 devdata->cmdrsp_rcv->net.enbdis.enable = state;
461 devdata->cmdrsp_rcv->net.enbdis.context = netdev;
462 devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
463 devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
464 err = visorchannel_signalinsert(devdata->dev->visorchannel,
466 devdata->cmdrsp_rcv);
469 devdata->chstat.sent_enbdis++;
474 * visornic_disable_with_timeout - Disable network adapter
475 * @netdev: netdevice to disable
476 * @timeout: timeout to wait for disable
478 * Disable the network adapter and inform the IO Partition that we
479 * are disabled, reclaim memory from rcv bufs.
480 * Returns 0 on success, negative for failure of IO Partition
485 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
487 struct visornic_devdata *devdata = netdev_priv(netdev);
493 /* send a msg telling the other end we are stopping incoming pkts */
494 spin_lock_irqsave(&devdata->priv_lock, flags);
495 devdata->enabled = 0;
496 devdata->enab_dis_acked = 0; /* must wait for ack */
497 spin_unlock_irqrestore(&devdata->priv_lock, flags);
499 /* send disable and wait for ack -- don't hold lock when sending
500 * disable because if the queue is full, insert might sleep.
501 * If an error occurs, don't wait for the timeout.
503 err = send_enbdis(netdev, 0, devdata);
507 /* wait for ack to arrive before we try to free rcv buffers
508 * NOTE: the other end automatically unposts the rcv buffers when
509 * when it gets a disable.
511 spin_lock_irqsave(&devdata->priv_lock, flags);
512 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
514 if (devdata->enab_dis_acked)
516 if (devdata->server_down || devdata->server_change_state) {
517 dev_dbg(&netdev->dev, "%s server went away\n",
521 set_current_state(TASK_INTERRUPTIBLE);
522 spin_unlock_irqrestore(&devdata->priv_lock, flags);
523 wait += schedule_timeout(msecs_to_jiffies(10));
524 spin_lock_irqsave(&devdata->priv_lock, flags);
527 /* Wait for usage to go to 1 (no other users) before freeing
530 if (atomic_read(&devdata->usage) > 1) {
532 set_current_state(TASK_INTERRUPTIBLE);
533 spin_unlock_irqrestore(&devdata->priv_lock, flags);
534 schedule_timeout(msecs_to_jiffies(10));
535 spin_lock_irqsave(&devdata->priv_lock, flags);
536 if (atomic_read(&devdata->usage))
540 /* we've set enabled to 0, so we can give up the lock. */
541 spin_unlock_irqrestore(&devdata->priv_lock, flags);
543 /* stop the transmit queue so nothing more can be transmitted */
544 netif_stop_queue(netdev);
546 napi_disable(&devdata->napi);
548 skb_queue_purge(&devdata->xmitbufhead);
550 /* Free rcv buffers - other end has automatically unposed them on
553 for (i = 0; i < devdata->num_rcv_bufs; i++) {
554 if (devdata->rcvbuf[i]) {
555 kfree_skb(devdata->rcvbuf[i]);
556 devdata->rcvbuf[i] = NULL;
564 * init_rcv_bufs -- initialize receive bufs and send them to the IO Part
565 * @netdev: struct netdevice
566 * @devdata: visornic_devdata
568 * Allocate rcv buffers and post them to the IO Partition.
569 * Return 0 for success, and negative for failure.
572 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
574 int i, j, count, err;
576 /* allocate fixed number of receive buffers to post to uisnic
577 * post receive buffers after we've allocated a required amount
579 for (i = 0; i < devdata->num_rcv_bufs; i++) {
580 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
581 if (!devdata->rcvbuf[i])
582 break; /* if we failed to allocate one let us stop */
584 if (i == 0) /* couldn't even allocate one -- bail out */
588 /* Ensure we can alloc 2/3rd of the requested number of buffers.
589 * 2/3 is an arbitrary choice; used also in ndis init.c
591 if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
592 /* free receive buffers we did alloc and then bail out */
593 for (i = 0; i < count; i++) {
594 kfree_skb(devdata->rcvbuf[i]);
595 devdata->rcvbuf[i] = NULL;
600 /* post receive buffers to receive incoming input - without holding
601 * lock - we've not enabled nor started the queue so there shouldn't
602 * be any rcv or xmit activity
604 for (i = 0; i < count; i++) {
605 err = post_skb(devdata->cmdrsp_rcv, devdata,
611 * If we posted at least one skb, we should return success,
612 * but need to free the resources that we have not successfully
615 for (j = i; j < count; j++) {
616 kfree_skb(devdata->rcvbuf[j]);
617 devdata->rcvbuf[j] = NULL;
628 * visornic_enable_with_timeout - send enable to IO Part
629 * @netdev: struct net_device
630 * @timeout: Time to wait for the ACK from the enable
632 * Sends enable to IOVM, inits, and posts receive buffers to IOVM
633 * timeout is defined in msecs (timeout of 0 specifies infinite wait)
634 * Return 0 for success, negative for failure.
637 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
640 struct visornic_devdata *devdata = netdev_priv(netdev);
644 napi_enable(&devdata->napi);
646 /* NOTE: the other end automatically unposts the rcv buffers when it
649 err = init_rcv_bufs(netdev, devdata);
651 dev_err(&netdev->dev,
652 "%s failed to init rcv bufs\n", __func__);
656 spin_lock_irqsave(&devdata->priv_lock, flags);
657 devdata->enabled = 1;
658 devdata->enab_dis_acked = 0;
660 /* now we're ready, let's send an ENB to uisnic but until we get
661 * an ACK back from uisnic, we'll drop the packets
663 devdata->n_rcv_packets_not_accepted = 0;
664 spin_unlock_irqrestore(&devdata->priv_lock, flags);
666 /* send enable and wait for ack -- don't hold lock when sending enable
667 * because if the queue is full, insert might sleep. If an error
670 err = send_enbdis(netdev, 1, devdata);
674 spin_lock_irqsave(&devdata->priv_lock, flags);
675 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
677 if (devdata->enab_dis_acked)
679 if (devdata->server_down || devdata->server_change_state) {
680 dev_dbg(&netdev->dev, "%s server went away\n",
684 set_current_state(TASK_INTERRUPTIBLE);
685 spin_unlock_irqrestore(&devdata->priv_lock, flags);
686 wait += schedule_timeout(msecs_to_jiffies(10));
687 spin_lock_irqsave(&devdata->priv_lock, flags);
690 spin_unlock_irqrestore(&devdata->priv_lock, flags);
692 if (!devdata->enab_dis_acked) {
693 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
697 netif_start_queue(netdev);
703 * visornic_timeout_reset - handle xmit timeout resets
704 * @work work item that scheduled the work
706 * Transmit Timeouts are typically handled by resetting the
707 * device for our virtual NIC we will send a Disable and Enable
708 * to the IOVM. If it doesn't respond we will trigger a serverdown.
711 visornic_timeout_reset(struct work_struct *work)
713 struct visornic_devdata *devdata;
714 struct net_device *netdev;
717 devdata = container_of(work, struct visornic_devdata, timeout_reset);
718 netdev = devdata->netdev;
721 if (!netif_running(netdev)) {
726 response = visornic_disable_with_timeout(netdev,
727 VISORNIC_INFINITE_RSP_WAIT);
729 goto call_serverdown;
731 response = visornic_enable_with_timeout(netdev,
732 VISORNIC_INFINITE_RSP_WAIT);
734 goto call_serverdown;
741 visornic_serverdown(devdata, NULL);
746 * visornic_open - Enable the visornic device and mark the queue started
747 * @netdev: netdevice to start
749 * Enable the device and start the transmit queue.
750 * Return 0 for success
753 visornic_open(struct net_device *netdev)
755 visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
761 * visornic_close - Disables the visornic device and stops the queues
762 * @netdev: netdevice to start
764 * Disable the device and stop the transmit queue.
765 * Return 0 for success
768 visornic_close(struct net_device *netdev)
770 visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
776 * devdata_xmits_outstanding - compute outstanding xmits
777 * @devdata: visornic_devdata for device
779 * Return value is the number of outstanding xmits.
781 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
783 if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
784 return devdata->chstat.sent_xmit -
785 devdata->chstat.got_xmit_done;
786 return (ULONG_MAX - devdata->chstat.got_xmit_done
787 + devdata->chstat.sent_xmit + 1);
791 * vnic_hit_high_watermark
792 * @devdata: indicates visornic device we are checking
793 * @high_watermark: max num of unacked xmits we will tolerate,
794 * before we will start throttling
796 * Returns true iff the number of unacked xmits sent to
797 * the IO partition is >= high_watermark.
799 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
800 ulong high_watermark)
802 return (devdata_xmits_outstanding(devdata) >= high_watermark);
806 * vnic_hit_low_watermark
807 * @devdata: indicates visornic device we are checking
808 * @low_watermark: we will wait until the num of unacked xmits
809 * drops to this value or lower before we start
812 * Returns true iff the number of unacked xmits sent to
813 * the IO partition is <= low_watermark.
815 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
818 return (devdata_xmits_outstanding(devdata) <= low_watermark);
822 * visornic_xmit - send a packet to the IO Partition
823 * @skb: Packet to be sent
824 * @netdev: net device the packet is being sent from
826 * Convert the skb to a cmdrsp so the IO Partition can understand it.
827 * Send the XMIT command to the IO Partition for processing. This
828 * function is protected from concurrent calls by a spinlock xmit_lock
829 * in the net_device struct, but as soon as the function returns it
830 * can be called again.
831 * Returns NETDEV_TX_OK.
834 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
836 struct visornic_devdata *devdata;
837 int len, firstfraglen, padlen;
838 struct uiscmdrsp *cmdrsp = NULL;
842 devdata = netdev_priv(netdev);
843 spin_lock_irqsave(&devdata->priv_lock, flags);
845 if (netif_queue_stopped(netdev) || devdata->server_down ||
846 devdata->server_change_state) {
847 spin_unlock_irqrestore(&devdata->priv_lock, flags);
849 dev_dbg(&netdev->dev,
850 "%s busy - queue stopped\n", __func__);
855 /* sk_buff struct is used to host network data throughout all the
856 * linux network subsystems
860 /* skb->len is the FULL length of data (including fragmentary portion)
861 * skb->data_len is the length of the fragment portion in frags
862 * skb->len - skb->data_len is size of the 1st fragment in skb->data
863 * calculate the length of the first fragment that skb->data is
866 firstfraglen = skb->len - skb->data_len;
867 if (firstfraglen < ETH_HLEN) {
868 spin_unlock_irqrestore(&devdata->priv_lock, flags);
870 dev_err(&netdev->dev,
871 "%s busy - first frag too small (%d)\n",
872 __func__, firstfraglen);
877 if ((len < ETH_MIN_PACKET_SIZE) &&
878 ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
879 /* pad the packet out to minimum size */
880 padlen = ETH_MIN_PACKET_SIZE - len;
881 memset(&skb->data[len], 0, padlen);
885 firstfraglen += padlen;
888 cmdrsp = devdata->xmit_cmdrsp;
890 memset(cmdrsp, 0, SIZEOF_CMDRSP);
891 cmdrsp->net.type = NET_XMIT;
892 cmdrsp->cmdtype = CMD_NET_TYPE;
894 /* save the pointer to skb -- we'll need it for completion */
895 cmdrsp->net.buf = skb;
897 if (vnic_hit_high_watermark(devdata,
898 devdata->max_outstanding_net_xmits)) {
899 /* extra NET_XMITs queued over to IOVM - need to wait */
900 devdata->chstat.reject_count++;
901 if (!devdata->queuefullmsg_logged &&
902 ((devdata->chstat.reject_count & 0x3ff) == 1))
903 devdata->queuefullmsg_logged = 1;
904 netif_stop_queue(netdev);
905 spin_unlock_irqrestore(&devdata->priv_lock, flags);
907 dev_dbg(&netdev->dev,
908 "%s busy - waiting for iovm to catch up\n",
913 if (devdata->queuefullmsg_logged)
914 devdata->queuefullmsg_logged = 0;
916 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
917 cmdrsp->net.xmt.lincsum.valid = 1;
918 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
919 if (skb_transport_header(skb) > skb->data) {
920 cmdrsp->net.xmt.lincsum.hrawoff =
921 skb_transport_header(skb) - skb->data;
922 cmdrsp->net.xmt.lincsum.hrawoff = 1;
924 if (skb_network_header(skb) > skb->data) {
925 cmdrsp->net.xmt.lincsum.nhrawoff =
926 skb_network_header(skb) - skb->data;
927 cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
929 cmdrsp->net.xmt.lincsum.csum = skb->csum;
931 cmdrsp->net.xmt.lincsum.valid = 0;
934 /* save off the length of the entire data packet */
935 cmdrsp->net.xmt.len = len;
937 /* copy ethernet header from first frag into ocmdrsp
938 * - everything else will be pass in frags & DMA'ed
940 memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
941 /* copy frags info - from skb->data we need to only provide access
944 cmdrsp->net.xmt.num_frags =
945 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
947 cmdrsp->net.xmt.frags);
948 if (cmdrsp->net.xmt.num_frags < 0) {
949 spin_unlock_irqrestore(&devdata->priv_lock, flags);
951 dev_err(&netdev->dev,
952 "%s busy - copy frags failed\n", __func__);
957 err = visorchannel_signalinsert(devdata->dev->visorchannel,
958 IOCHAN_TO_IOPART, cmdrsp);
960 netif_stop_queue(netdev);
961 spin_unlock_irqrestore(&devdata->priv_lock, flags);
963 dev_dbg(&netdev->dev,
964 "%s busy - signalinsert failed\n", __func__);
969 /* Track the skbs that have been sent to the IOVM for XMIT */
970 skb_queue_head(&devdata->xmitbufhead, skb);
972 /* update xmt stats */
973 devdata->net_stats.tx_packets++;
974 devdata->net_stats.tx_bytes += skb->len;
975 devdata->chstat.sent_xmit++;
977 /* check if we have hit the high watermark for netif_stop_queue() */
978 if (vnic_hit_high_watermark(devdata,
979 devdata->upper_threshold_net_xmits)) {
980 /* extra NET_XMITs queued over to IOVM - need to wait */
981 /* stop queue - call netif_wake_queue() after lower threshold */
982 netif_stop_queue(netdev);
983 dev_dbg(&netdev->dev,
984 "%s busy - invoking iovm flow control\n",
986 devdata->flow_control_upper_hits++;
988 spin_unlock_irqrestore(&devdata->priv_lock, flags);
990 /* skb will be freed when we get back NET_XMIT_DONE */
995 * visornic_get_stats - returns net_stats of the visornic device
998 * Returns the net_device_stats for the device
1000 static struct net_device_stats *
1001 visornic_get_stats(struct net_device *netdev)
1003 struct visornic_devdata *devdata = netdev_priv(netdev);
1005 return &devdata->net_stats;
1009 * visornic_change_mtu - changes mtu of device.
1010 * @netdev: netdevice
1011 * @new_mtu: value of new mtu
1013 * MTU cannot be changed by system, must be changed via
1014 * CONTROLVM message. All vnics and pnics in a switch have
1015 * to have the same MTU for everything to work.
1016 * Currently not supported.
1020 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1026 * visornic_set_multi - changes mtu of device.
1027 * @netdev: netdevice
1029 * Only flag we support currently is IFF_PROMISC
1033 visornic_set_multi(struct net_device *netdev)
1035 struct uiscmdrsp *cmdrsp;
1036 struct visornic_devdata *devdata = netdev_priv(netdev);
1039 if (devdata->old_flags == netdev->flags)
1042 if ((netdev->flags & IFF_PROMISC) ==
1043 (devdata->old_flags & IFF_PROMISC))
1044 goto out_save_flags;
1046 cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1049 cmdrsp->cmdtype = CMD_NET_TYPE;
1050 cmdrsp->net.type = NET_RCV_PROMISC;
1051 cmdrsp->net.enbdis.context = netdev;
1052 cmdrsp->net.enbdis.enable =
1053 netdev->flags & IFF_PROMISC;
1054 err = visorchannel_signalinsert(devdata->dev->visorchannel,
1062 devdata->old_flags = netdev->flags;
1066 * visornic_xmit_timeout - request to timeout the xmit
1069 * Queue the work and return. Make sure we have not already
1070 * been informed the IO Partition is gone, if it is gone
1071 * we will already timeout the xmits.
1074 visornic_xmit_timeout(struct net_device *netdev)
1076 struct visornic_devdata *devdata = netdev_priv(netdev);
1077 unsigned long flags;
1079 spin_lock_irqsave(&devdata->priv_lock, flags);
1080 if (devdata->going_away) {
1081 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1082 dev_dbg(&devdata->dev->device,
1083 "%s aborting because device removal pending\n",
1088 /* Ensure that a ServerDown message hasn't been received */
1089 if (!devdata->enabled ||
1090 (devdata->server_down && !devdata->server_change_state)) {
1091 dev_dbg(&netdev->dev, "%s no processing\n",
1093 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1096 schedule_work(&devdata->timeout_reset);
1097 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1101 * repost_return - repost rcv bufs that have come back
1102 * @cmdrsp: io channel command struct to post
1103 * @devdata: visornic devdata for the device
1105 * @netdev: netdevice
1107 * Repost rcv buffers that have been returned to us when
1108 * we are finished with them.
1109 * Returns 0 for success, -1 for error.
1112 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1113 struct sk_buff *skb, struct net_device *netdev)
1115 struct net_pkt_rcv copy;
1116 int i = 0, cc, numreposted;
1120 copy = cmdrsp->net.rcv;
1121 switch (copy.numrcvbufs) {
1135 for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1136 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1137 if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1140 if ((skb) && devdata->rcvbuf[i] == skb) {
1141 devdata->found_repost_rcvbuf_cnt++;
1143 devdata->repost_found_skb_cnt++;
1145 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1146 if (!devdata->rcvbuf[i]) {
1147 devdata->num_rcv_bufs_could_not_alloc++;
1148 devdata->alloc_failed_in_repost_rtn_cnt++;
1152 status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1154 kfree_skb(devdata->rcvbuf[i]);
1155 devdata->rcvbuf[i] = NULL;
1162 if (numreposted != copy.numrcvbufs) {
1163 devdata->n_repost_deficit++;
1171 devdata->bad_rcv_buf++;
1178 * visornic_rx - Handle receive packets coming back from IO Part
1179 * @cmdrsp: Receive packet returned from IO Part
1181 * Got a receive packet back from the IO Part, handle it and send
1183 * Returns 1 iff an skb was received, otherwise 0
1186 visornic_rx(struct uiscmdrsp *cmdrsp)
1188 struct visornic_devdata *devdata;
1189 struct sk_buff *skb, *prev, *curr;
1190 struct net_device *netdev;
1191 int cc, currsize, off;
1193 unsigned long flags;
1195 /* post new rcv buf to the other end using the cmdrsp we have at hand
1196 * post it without holding lock - but we'll use the signal lock to
1197 * synchronize the queue insert the cmdrsp that contains the net.rcv
1198 * is the one we are using to repost, so copy the info we need from it.
1200 skb = cmdrsp->net.buf;
1203 devdata = netdev_priv(netdev);
1205 spin_lock_irqsave(&devdata->priv_lock, flags);
1206 atomic_dec(&devdata->num_rcvbuf_in_iovm);
1208 /* set length to how much was ACTUALLY received -
1209 * NOTE: rcv_done_len includes actual length of data rcvd
1212 skb->len = cmdrsp->net.rcv.rcv_done_len;
1214 /* update rcv stats - call it with priv_lock held */
1215 devdata->net_stats.rx_packets++;
1216 devdata->net_stats.rx_bytes += skb->len;
1218 /* test enabled while holding lock */
1219 if (!(devdata->enabled && devdata->enab_dis_acked)) {
1220 /* don't process it unless we're in enable mode and until
1221 * we've gotten an ACK saying the other end got our RCV enable
1223 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1224 repost_return(cmdrsp, devdata, skb, netdev);
1228 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1230 /* when skb was allocated, skb->dev, skb->data, skb->len and
1231 * skb->data_len were setup. AND, data has already put into the
1232 * skb (both first frag and in frags pages)
1233 * NOTE: firstfragslen is the amount of data in skb->data and that
1234 * which is not in nr_frags or frag_list. This is now simply
1235 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1236 * firstfrag & set data_len to show rest see if we have to chain
1239 if (skb->len > RCVPOST_BUF_SIZE) { /* do PRECAUTIONARY check */
1240 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1241 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1242 dev_err(&devdata->netdev->dev,
1243 "repost_return failed");
1246 /* length rcvd is greater than firstfrag in this skb rcv buf */
1247 skb->tail += RCVPOST_BUF_SIZE; /* amount in skb->data */
1248 skb->data_len = skb->len - RCVPOST_BUF_SIZE; /* amount that
1253 /* data fits in this skb - no chaining - do
1254 * PRECAUTIONARY check
1256 if (cmdrsp->net.rcv.numrcvbufs != 1) { /* should be 1 */
1257 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1258 dev_err(&devdata->netdev->dev,
1259 "repost_return failed");
1262 skb->tail += skb->len;
1263 skb->data_len = 0; /* nothing rcvd in frag_list */
1265 off = skb_tail_pointer(skb) - skb->data;
1267 /* amount we bumped tail by in the head skb
1268 * it is used to calculate the size of each chained skb below
1269 * it is also used to index into bufline to continue the copy
1270 * (for chansocktwopc)
1271 * if necessary chain the rcv skbs together.
1272 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1273 * chain the rest to that one.
1274 * - do PRECAUTIONARY check
1276 if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1277 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1278 dev_err(&devdata->netdev->dev, "repost_return failed");
1282 if (cmdrsp->net.rcv.numrcvbufs > 1) {
1283 /* chain the various rcv buffers into the skb's frag_list. */
1284 /* Note: off was initialized above */
1285 for (cc = 1, prev = NULL;
1286 cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1287 curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1289 if (!prev) /* start of list- set head */
1290 skb_shinfo(skb)->frag_list = curr;
1295 /* should we set skb->len and skb->data_len for each
1296 * buffer being chained??? can't hurt!
1298 currsize = min(skb->len - off,
1299 (unsigned int)RCVPOST_BUF_SIZE);
1300 curr->len = currsize;
1301 curr->tail += currsize;
1305 /* assert skb->len == off */
1306 if (skb->len != off) {
1307 netdev_err(devdata->netdev,
1308 "something wrong; skb->len:%d != off:%d\n",
1313 /* set up packet's protocol type using ethernet header - this
1314 * sets up skb->pkt_type & it also PULLS out the eth header
1316 skb->protocol = eth_type_trans(skb, netdev);
1321 skb->ip_summed = CHECKSUM_NONE;
1324 if (netdev->flags & IFF_PROMISC)
1325 break; /* accept all packets */
1326 if (skb->pkt_type == PACKET_BROADCAST) {
1327 if (netdev->flags & IFF_BROADCAST)
1328 break; /* accept all broadcast packets */
1329 } else if (skb->pkt_type == PACKET_MULTICAST) {
1330 if ((netdev->flags & IFF_MULTICAST) &&
1331 (netdev_mc_count(netdev))) {
1332 struct netdev_hw_addr *ha;
1335 /* only accept multicast packets that we can
1336 * find in our multicast address list
1338 netdev_for_each_mc_addr(ha, netdev) {
1339 if (ether_addr_equal(eth->h_dest,
1345 /* accept pkt, dest matches a multicast addr */
1349 /* accept packet, h_dest must match vnic mac address */
1350 } else if (skb->pkt_type == PACKET_HOST) {
1352 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1353 /* something is not right */
1354 dev_err(&devdata->netdev->dev,
1355 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1356 netdev->name, eth->h_dest, netdev->dev_addr);
1358 /* drop packet - don't forward it up to OS */
1359 devdata->n_rcv_packets_not_accepted++;
1360 repost_return(cmdrsp, devdata, skb, netdev);
1364 netif_receive_skb(skb);
1365 /* netif_rx returns various values, but "in practice most drivers
1366 * ignore the return value
1371 * whether the packet got dropped or handled, the skb is freed by
1372 * kernel code, so we shouldn't free it. but we should repost a
1375 repost_return(cmdrsp, devdata, skb, netdev);
1380 * devdata_initialize - Initialize devdata structure
1381 * @devdata: visornic_devdata structure to initialize
1382 * #dev: visorbus_deviced it belongs to
1384 * Setup initial values for the visornic based on channel and default
1386 * Returns a pointer to the devdata structure
1388 static struct visornic_devdata *
1389 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1392 devdata->incarnation_id = get_jiffies_64();
1397 * devdata_release - Frees up references in devdata
1398 * @devdata: struct to clean up
1400 * Frees up references in devdata.
1403 static void devdata_release(struct visornic_devdata *devdata)
1405 kfree(devdata->rcvbuf);
1406 kfree(devdata->cmdrsp_rcv);
1407 kfree(devdata->xmit_cmdrsp);
1410 static const struct net_device_ops visornic_dev_ops = {
1411 .ndo_open = visornic_open,
1412 .ndo_stop = visornic_close,
1413 .ndo_start_xmit = visornic_xmit,
1414 .ndo_get_stats = visornic_get_stats,
1415 .ndo_change_mtu = visornic_change_mtu,
1416 .ndo_tx_timeout = visornic_xmit_timeout,
1417 .ndo_set_rx_mode = visornic_set_multi,
1421 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1422 size_t len, loff_t *offset)
1424 ssize_t bytes_read = 0;
1426 struct visornic_devdata *devdata;
1427 struct net_device *dev;
1432 vbuf = kzalloc(len, GFP_KERNEL);
1436 /* for each vnic channel dump out channel specific data */
1438 for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1439 /* Only consider netdevs that are visornic, and are open */
1440 if ((dev->netdev_ops != &visornic_dev_ops) ||
1441 (!netif_queue_stopped(dev)))
1444 devdata = netdev_priv(dev);
1445 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1446 "netdev = %s (0x%p), MAC Addr %pM\n",
1450 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451 "VisorNic Dev Info = 0x%p\n", devdata);
1452 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1453 " num_rcv_bufs = %d\n",
1454 devdata->num_rcv_bufs);
1455 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1456 " max_outstanding_next_xmits = %lu\n",
1457 devdata->max_outstanding_net_xmits);
1458 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459 " upper_threshold_net_xmits = %lu\n",
1460 devdata->upper_threshold_net_xmits);
1461 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462 " lower_threshold_net_xmits = %lu\n",
1463 devdata->lower_threshold_net_xmits);
1464 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465 " queuefullmsg_logged = %d\n",
1466 devdata->queuefullmsg_logged);
1467 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468 " chstat.got_rcv = %lu\n",
1469 devdata->chstat.got_rcv);
1470 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471 " chstat.got_enbdisack = %lu\n",
1472 devdata->chstat.got_enbdisack);
1473 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474 " chstat.got_xmit_done = %lu\n",
1475 devdata->chstat.got_xmit_done);
1476 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477 " chstat.xmit_fail = %lu\n",
1478 devdata->chstat.xmit_fail);
1479 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480 " chstat.sent_enbdis = %lu\n",
1481 devdata->chstat.sent_enbdis);
1482 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483 " chstat.sent_promisc = %lu\n",
1484 devdata->chstat.sent_promisc);
1485 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486 " chstat.sent_post = %lu\n",
1487 devdata->chstat.sent_post);
1488 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489 " chstat.sent_post_failed = %lu\n",
1490 devdata->chstat.sent_post_failed);
1491 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492 " chstat.sent_xmit = %lu\n",
1493 devdata->chstat.sent_xmit);
1494 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495 " chstat.reject_count = %lu\n",
1496 devdata->chstat.reject_count);
1497 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498 " chstat.extra_rcvbufs_sent = %lu\n",
1499 devdata->chstat.extra_rcvbufs_sent);
1500 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501 " n_rcv0 = %lu\n", devdata->n_rcv0);
1502 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503 " n_rcv1 = %lu\n", devdata->n_rcv1);
1504 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505 " n_rcv2 = %lu\n", devdata->n_rcv2);
1506 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507 " n_rcvx = %lu\n", devdata->n_rcvx);
1508 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509 " num_rcvbuf_in_iovm = %d\n",
1510 atomic_read(&devdata->num_rcvbuf_in_iovm));
1511 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1512 " alloc_failed_in_if_needed_cnt = %lu\n",
1513 devdata->alloc_failed_in_if_needed_cnt);
1514 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1516 devdata->alloc_failed_in_repost_rtn_cnt);
1517 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518 * " inner_loop_limit_reached_cnt = %lu\n",
1519 * devdata->inner_loop_limit_reached_cnt);
1521 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522 " found_repost_rcvbuf_cnt = %lu\n",
1523 devdata->found_repost_rcvbuf_cnt);
1524 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1525 " repost_found_skb_cnt = %lu\n",
1526 devdata->repost_found_skb_cnt);
1527 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528 " n_repost_deficit = %lu\n",
1529 devdata->n_repost_deficit);
1530 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531 " bad_rcv_buf = %lu\n",
1532 devdata->bad_rcv_buf);
1533 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534 " n_rcv_packets_not_accepted = %lu\n",
1535 devdata->n_rcv_packets_not_accepted);
1536 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537 " interrupts_rcvd = %llu\n",
1538 devdata->interrupts_rcvd);
1539 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540 " interrupts_notme = %llu\n",
1541 devdata->interrupts_notme);
1542 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543 " interrupts_disabled = %llu\n",
1544 devdata->interrupts_disabled);
1545 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546 " busy_cnt = %llu\n",
1548 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549 " flow_control_upper_hits = %llu\n",
1550 devdata->flow_control_upper_hits);
1551 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552 " flow_control_lower_hits = %llu\n",
1553 devdata->flow_control_lower_hits);
1554 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555 " netif_queue = %s\n",
1556 netif_queue_stopped(devdata->netdev) ?
1557 "stopped" : "running");
1558 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559 " xmits_outstanding = %lu\n",
1560 devdata_xmits_outstanding(devdata));
1563 bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1568 static struct dentry *visornic_debugfs_dir;
1569 static const struct file_operations debugfs_info_fops = {
1570 .read = info_debugfs_read,
1574 * send_rcv_posts_if_needed
1575 * @devdata: visornic device
1577 * Send receive buffers to the IO Partition.
1581 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1584 struct net_device *netdev;
1585 struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1586 int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1589 /* don't do this until vnic is marked ready */
1590 if (!(devdata->enabled && devdata->enab_dis_acked))
1593 netdev = devdata->netdev;
1594 rcv_bufs_allocated = 0;
1595 /* this code is trying to prevent getting stuck here forever,
1596 * but still retry it if you cant allocate them all this time.
1598 cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1599 while (cur_num_rcv_bufs_to_alloc > 0) {
1600 cur_num_rcv_bufs_to_alloc--;
1601 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1602 if (devdata->rcvbuf[i])
1604 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1605 if (!devdata->rcvbuf[i]) {
1606 devdata->alloc_failed_in_if_needed_cnt++;
1609 rcv_bufs_allocated++;
1610 err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1612 kfree_skb(devdata->rcvbuf[i]);
1613 devdata->rcvbuf[i] = NULL;
1616 devdata->chstat.extra_rcvbufs_sent++;
1619 devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1624 * drain_resp_queue - drains and ignores all messages from the resp queue
1625 * @cmdrsp: io channel command response message
1626 * @devdata: visornic device to drain
1629 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1631 while (!visorchannel_signalremove(devdata->dev->visorchannel,
1638 * service_resp_queue - drains the response queue
1639 * @cmdrsp: io channel command response message
1640 * @devdata: visornic device to drain
1642 * Drain the response queue of any responses from the IO partition.
1643 * Process the responses as we get them.
1644 * Returns when response queue is empty or when the thread stops.
1647 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1648 int *rx_work_done, int budget)
1650 unsigned long flags;
1651 struct net_device *netdev;
1653 while (*rx_work_done < budget) {
1654 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1657 if (visorchannel_signalremove(devdata->dev->visorchannel,
1660 break; /* queue empty */
1662 switch (cmdrsp->net.type) {
1664 devdata->chstat.got_rcv++;
1665 /* process incoming packet */
1666 *rx_work_done += visornic_rx(cmdrsp);
1669 spin_lock_irqsave(&devdata->priv_lock, flags);
1670 devdata->chstat.got_xmit_done++;
1671 if (cmdrsp->net.xmtdone.xmt_done_result)
1672 devdata->chstat.xmit_fail++;
1673 /* only call queue wake if we stopped it */
1674 netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1675 /* ASSERT netdev == vnicinfo->netdev; */
1676 if ((netdev == devdata->netdev) &&
1677 netif_queue_stopped(netdev)) {
1678 /* check if we have crossed the lower watermark
1679 * for netif_wake_queue()
1681 if (vnic_hit_low_watermark
1683 devdata->lower_threshold_net_xmits)) {
1684 /* enough NET_XMITs completed
1685 * so can restart netif queue
1687 netif_wake_queue(netdev);
1688 devdata->flow_control_lower_hits++;
1691 skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1692 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1693 kfree_skb(cmdrsp->net.buf);
1695 case NET_RCV_ENBDIS_ACK:
1696 devdata->chstat.got_enbdisack++;
1697 netdev = (struct net_device *)
1698 cmdrsp->net.enbdis.context;
1699 spin_lock_irqsave(&devdata->priv_lock, flags);
1700 devdata->enab_dis_acked = 1;
1701 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1703 if (devdata->server_down &&
1704 devdata->server_change_state) {
1705 /* Inform Linux that the link is up */
1706 devdata->server_down = false;
1707 devdata->server_change_state = false;
1708 netif_wake_queue(netdev);
1709 netif_carrier_on(netdev);
1712 case NET_CONNECT_STATUS:
1713 netdev = devdata->netdev;
1714 if (cmdrsp->net.enbdis.enable == 1) {
1715 spin_lock_irqsave(&devdata->priv_lock, flags);
1716 devdata->enabled = cmdrsp->net.enbdis.enable;
1717 spin_unlock_irqrestore(&devdata->priv_lock,
1719 netif_wake_queue(netdev);
1720 netif_carrier_on(netdev);
1722 netif_stop_queue(netdev);
1723 netif_carrier_off(netdev);
1724 spin_lock_irqsave(&devdata->priv_lock, flags);
1725 devdata->enabled = cmdrsp->net.enbdis.enable;
1726 spin_unlock_irqrestore(&devdata->priv_lock,
1733 /* cmdrsp is now available for reuse */
1737 static int visornic_poll(struct napi_struct *napi, int budget)
1739 struct visornic_devdata *devdata = container_of(napi,
1740 struct visornic_devdata,
1745 err = send_rcv_posts_if_needed(devdata);
1749 service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1751 /* If there aren't any more packets to receive stop the poll */
1752 if (rx_count < budget)
1753 napi_complete_done(napi, rx_count);
1759 * poll_for_irq - Checks the status of the response queue.
1760 * @v: void pointer to the visronic devdata
1762 * Main function of the vnic_incoming thread. Periodically check the
1763 * response queue and drain it if needed.
1764 * Returns when thread has stopped.
1767 poll_for_irq(unsigned long v)
1769 struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1771 if (!visorchannel_signalempty(
1772 devdata->dev->visorchannel,
1773 IOCHAN_FROM_IOPART))
1774 napi_schedule(&devdata->napi);
1776 atomic_set(&devdata->interrupt_rcvd, 0);
1778 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1782 * visornic_probe - probe function for visornic devices
1783 * @dev: The visor device discovered
1785 * Called when visorbus discovers a visornic device on its
1786 * bus. It creates a new visornic ethernet adapter.
1787 * Returns 0 or negative for error.
1789 static int visornic_probe(struct visor_device *dev)
1791 struct visornic_devdata *devdata = NULL;
1792 struct net_device *netdev = NULL;
1794 int channel_offset = 0;
1797 netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1799 dev_err(&dev->device,
1800 "%s alloc_etherdev failed\n", __func__);
1804 netdev->netdev_ops = &visornic_dev_ops;
1805 netdev->watchdog_timeo = 5 * HZ;
1806 SET_NETDEV_DEV(netdev, &dev->device);
1808 /* Get MAC address from channel and read it into the device. */
1809 netdev->addr_len = ETH_ALEN;
1810 channel_offset = offsetof(struct spar_io_channel_protocol,
1812 err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1815 dev_err(&dev->device,
1816 "%s failed to get mac addr from chan (%d)\n",
1818 goto cleanup_netdev;
1821 devdata = devdata_initialize(netdev_priv(netdev), dev);
1823 dev_err(&dev->device,
1824 "%s devdata_initialize failed\n", __func__);
1826 goto cleanup_netdev;
1828 /* don't trust messages laying around in the channel */
1829 drain_resp_queue(devdata->cmdrsp, devdata);
1831 devdata->netdev = netdev;
1832 dev_set_drvdata(&dev->device, devdata);
1833 init_waitqueue_head(&devdata->rsp_queue);
1834 spin_lock_init(&devdata->priv_lock);
1835 devdata->enabled = 0; /* not yet */
1836 atomic_set(&devdata->usage, 1);
1838 /* Setup rcv bufs */
1839 channel_offset = offsetof(struct spar_io_channel_protocol,
1841 err = visorbus_read_channel(dev, channel_offset,
1842 &devdata->num_rcv_bufs, 4);
1844 dev_err(&dev->device,
1845 "%s failed to get #rcv bufs from chan (%d)\n",
1847 goto cleanup_netdev;
1850 devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1851 sizeof(struct sk_buff *), GFP_KERNEL);
1852 if (!devdata->rcvbuf) {
1854 goto cleanup_netdev;
1857 /* set the net_xmit outstanding threshold */
1858 /* always leave two slots open but you should have 3 at a minimum */
1859 /* note that max_outstanding_net_xmits must be > 0 */
1860 devdata->max_outstanding_net_xmits =
1861 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1862 devdata->upper_threshold_net_xmits =
1863 max_t(unsigned long,
1864 2, (devdata->max_outstanding_net_xmits - 1));
1865 devdata->lower_threshold_net_xmits =
1866 max_t(unsigned long,
1867 1, (devdata->max_outstanding_net_xmits / 2));
1869 skb_queue_head_init(&devdata->xmitbufhead);
1871 /* create a cmdrsp we can use to post and unpost rcv buffers */
1872 devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1873 if (!devdata->cmdrsp_rcv) {
1875 goto cleanup_rcvbuf;
1877 devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1878 if (!devdata->xmit_cmdrsp) {
1880 goto cleanup_cmdrsp_rcv;
1882 INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1883 devdata->server_down = false;
1884 devdata->server_change_state = false;
1886 /*set the default mtu */
1887 channel_offset = offsetof(struct spar_io_channel_protocol,
1889 err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1891 dev_err(&dev->device,
1892 "%s failed to get mtu from chan (%d)\n",
1894 goto cleanup_xmit_cmdrsp;
1897 /* TODO: Setup Interrupt information */
1898 /* Let's start our threads to get responses */
1899 netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1901 setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1902 (unsigned long)devdata);
1903 /* Note: This time has to start running before the while
1904 * loop below because the napi routine is responsible for
1905 * setting enab_dis_acked
1907 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1909 channel_offset = offsetof(struct spar_io_channel_protocol,
1910 channel_header.features);
1911 err = visorbus_read_channel(dev, channel_offset, &features, 8);
1913 dev_err(&dev->device,
1914 "%s failed to get features from chan (%d)\n",
1916 goto cleanup_napi_add;
1919 features |= ULTRA_IO_CHANNEL_IS_POLLING;
1920 features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1921 err = visorbus_write_channel(dev, channel_offset, &features, 8);
1923 dev_err(&dev->device,
1924 "%s failed to set features in chan (%d)\n",
1926 goto cleanup_napi_add;
1929 /* Note: Interrupts have to be enable before the while
1930 * loop below because the napi routine is responsible for
1931 * setting enab_dis_acked
1933 visorbus_enable_channel_interrupts(dev);
1935 err = register_netdev(netdev);
1937 dev_err(&dev->device,
1938 "%s register_netdev failed (%d)\n", __func__, err);
1939 goto cleanup_napi_add;
1942 /* create debug/sysfs directories */
1943 devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1944 visornic_debugfs_dir);
1945 if (!devdata->eth_debugfs_dir) {
1946 dev_err(&dev->device,
1947 "%s debugfs_create_dir %s failed\n",
1948 __func__, netdev->name);
1950 goto cleanup_register_netdev;
1953 dev_info(&dev->device, "%s success netdev=%s\n",
1954 __func__, netdev->name);
1957 cleanup_register_netdev:
1958 unregister_netdev(netdev);
1961 del_timer_sync(&devdata->irq_poll_timer);
1962 netif_napi_del(&devdata->napi);
1964 cleanup_xmit_cmdrsp:
1965 kfree(devdata->xmit_cmdrsp);
1968 kfree(devdata->cmdrsp_rcv);
1971 kfree(devdata->rcvbuf);
1974 free_netdev(netdev);
1979 * host_side_disappeared - IO part is gone.
1980 * @devdata: device object
1982 * IO partition servicing this device is gone, do cleanup
1985 static void host_side_disappeared(struct visornic_devdata *devdata)
1987 unsigned long flags;
1989 spin_lock_irqsave(&devdata->priv_lock, flags);
1990 devdata->dev = NULL; /* indicate device destroyed */
1991 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1995 * visornic_remove - Called when visornic dev goes away
1996 * @dev: visornic device that is being removed
1998 * Called when DEVICE_DESTROY gets called to remove device.
2001 static void visornic_remove(struct visor_device *dev)
2003 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2004 struct net_device *netdev;
2005 unsigned long flags;
2008 dev_err(&dev->device, "%s no devdata\n", __func__);
2011 spin_lock_irqsave(&devdata->priv_lock, flags);
2012 if (devdata->going_away) {
2013 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2014 dev_err(&dev->device, "%s already being removed\n", __func__);
2017 devdata->going_away = true;
2018 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2019 netdev = devdata->netdev;
2021 dev_err(&dev->device, "%s not net device\n", __func__);
2025 /* going_away prevents new items being added to the workqueues */
2026 cancel_work_sync(&devdata->timeout_reset);
2028 debugfs_remove_recursive(devdata->eth_debugfs_dir);
2030 unregister_netdev(netdev); /* this will call visornic_close() */
2032 del_timer_sync(&devdata->irq_poll_timer);
2033 netif_napi_del(&devdata->napi);
2035 dev_set_drvdata(&dev->device, NULL);
2036 host_side_disappeared(devdata);
2037 devdata_release(devdata);
2038 free_netdev(netdev);
2042 * visornic_pause - Called when IO Part disappears
2043 * @dev: visornic device that is being serviced
2044 * @complete_func: call when finished.
2046 * Called when the IO Partition has gone down. Need to free
2047 * up resources and wait for IO partition to come back. Mark
2048 * link as down and don't attempt any DMA. When we have freed
2049 * memory call the complete_func so that Command knows we are
2050 * done. If we don't call complete_func, IO part will never
2052 * Returns 0 for success.
2054 static int visornic_pause(struct visor_device *dev,
2055 visorbus_state_complete_func complete_func)
2057 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2059 visornic_serverdown(devdata, complete_func);
2064 * visornic_resume - Called when IO part has recovered
2065 * @dev: visornic device that is being serviced
2066 * @compelte_func: call when finished
2068 * Called when the IO partition has recovered. Reestablish
2069 * connection to the IO part and set the link up. Okay to do
2071 * Returns 0 for success.
2073 static int visornic_resume(struct visor_device *dev,
2074 visorbus_state_complete_func complete_func)
2076 struct visornic_devdata *devdata;
2077 struct net_device *netdev;
2078 unsigned long flags;
2080 devdata = dev_get_drvdata(&dev->device);
2082 dev_err(&dev->device, "%s no devdata\n", __func__);
2086 netdev = devdata->netdev;
2088 spin_lock_irqsave(&devdata->priv_lock, flags);
2089 if (devdata->server_change_state) {
2090 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2091 dev_err(&dev->device, "%s server already changing state\n",
2095 if (!devdata->server_down) {
2096 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2097 dev_err(&dev->device, "%s server not down\n", __func__);
2098 complete_func(dev, 0);
2101 devdata->server_change_state = true;
2102 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2104 /* Must transition channel to ATTACHED state BEFORE
2105 * we can start using the device again.
2106 * TODO: State transitions
2108 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2114 complete_func(dev, 0);
2118 /* This is used to tell the visor bus driver which types of visor devices
2119 * we support, and what functions to call when a visor device that we support
2120 * is attached or removed.
2122 static struct visor_driver visornic_driver = {
2124 .owner = THIS_MODULE,
2125 .channel_types = visornic_channel_types,
2126 .probe = visornic_probe,
2127 .remove = visornic_remove,
2128 .pause = visornic_pause,
2129 .resume = visornic_resume,
2130 .channel_interrupt = NULL,
2134 * visornic_init - Init function
2136 * Init function for the visornic driver. Do initial driver setup
2137 * and wait for devices.
2138 * Returns 0 for success, negative for error.
2140 static int visornic_init(void)
2145 visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2146 if (!visornic_debugfs_dir)
2149 ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2150 &debugfs_info_fops);
2152 goto cleanup_debugfs;
2153 ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2154 NULL, &debugfs_enable_ints_fops);
2156 goto cleanup_debugfs;
2158 err = visorbus_register_visor_driver(&visornic_driver);
2160 goto cleanup_debugfs;
2165 debugfs_remove_recursive(visornic_debugfs_dir);
2171 * visornic_cleanup - driver exit routine
2173 * Unregister driver from the bus and free up memory.
2175 static void visornic_cleanup(void)
2177 visorbus_unregister_visor_driver(&visornic_driver);
2179 debugfs_remove_recursive(visornic_debugfs_dir);
2182 module_init(visornic_init);
2183 module_exit(visornic_cleanup);
2185 MODULE_AUTHOR("Unisys");
2186 MODULE_LICENSE("GPL");
2187 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");