drivers/staging/unisys/visornic/visornic_main.c

   1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   2  * All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  * NON INFRINGEMENT.  See the GNU General Public License for more
  12  * details.
  13  */
  14
  15 /* This driver lives in a spar partition, and registers to ethernet io
  16  * channels from the visorbus driver. It creates netdev devices and
  17  * forwards transmit to the IO channel and accepts rcvs from the IO
  18  * Partition via the IO channel.
  19  */
  20
  21 #include <linux/debugfs.h>
  22 #include <linux/etherdevice.h>
  23 #include <linux/netdevice.h>
  24 #include <linux/kthread.h>
  25 #include <linux/skbuff.h>
  26 #include <linux/rtnetlink.h>
  27
  28 #include "visorbus.h"
  29 #include "iochannel.h"
  30
  31 #define VISORNIC_INFINITE_RSP_WAIT 0
  32
  33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  34  *         = 163840 bytes
  35  */
  36 #define MAX_BUF 163840
  37 #define NAPI_WEIGHT 64
  38
  39 /* GUIDS for director channel type supported by this driver.  */
  40 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  41         /* Note that the only channel type we expect to be reported by the
  42          * bus driver is the SPAR_VNIC channel.
  43          */
  44         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
  45         { NULL_UUID_LE, NULL }
  46 };
  47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  48 /*
  49  * FIXME XXX: This next line of code must be fixed and removed before
  50  * acceptance into the 'normal' part of the kernel.  It is only here as a place
  51  * holder to get module autoloading functionality working for visorbus.  Code
  52  * must be added to scripts/mode/file2alias.c, etc., to get this working
  53  * properly.
  54  */
  55 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
  56
  57 struct chanstat {
  58         unsigned long got_rcv;
  59         unsigned long got_enbdisack;
  60         unsigned long got_xmit_done;
  61         unsigned long xmit_fail;
  62         unsigned long sent_enbdis;
  63         unsigned long sent_promisc;
  64         unsigned long sent_post;
  65         unsigned long sent_post_failed;
  66         unsigned long sent_xmit;
  67         unsigned long reject_count;
  68         unsigned long extra_rcvbufs_sent;
  69 };
  70
  71 struct visornic_devdata {
  72         /* 0 disabled 1 enabled to receive */
  73         unsigned short enabled;
  74         /* NET_RCV_ENABLE/DISABLE acked by IOPART */
  75         unsigned short enab_dis_acked;
  76
  77         struct visor_device *dev;
  78         struct net_device *netdev;
  79         struct net_device_stats net_stats;
  80         atomic_t interrupt_rcvd;
  81         wait_queue_head_t rsp_queue;
  82         struct sk_buff **rcvbuf;
  83         /* incarnation_id lets IOPART know about re-birth */
  84         u64 incarnation_id;
  85         /* flags as they were prior to set_multicast_list */
  86         unsigned short old_flags;
  87         atomic_t usage; /* count of users */
  88
  89         /* number of rcv buffers the vnic will post */
  90         int num_rcv_bufs;
  91         int num_rcv_bufs_could_not_alloc;
  92         atomic_t num_rcvbuf_in_iovm;
  93         unsigned long alloc_failed_in_if_needed_cnt;
  94         unsigned long alloc_failed_in_repost_rtn_cnt;
  95
  96         /* absolute max number of outstanding xmits - should never hit this */
  97         unsigned long max_outstanding_net_xmits;
  98         /* high water mark for calling netif_stop_queue() */
  99         unsigned long upper_threshold_net_xmits;
 100         /* high water mark for calling netif_wake_queue() */
 101         unsigned long lower_threshold_net_xmits;
 102         /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
 103         struct sk_buff_head xmitbufhead;
 104
 105         visorbus_state_complete_func server_down_complete_func;
 106         struct work_struct timeout_reset;
 107         /* cmdrsp_rcv is used for posting/unposting rcv buffers  */
 108         struct uiscmdrsp *cmdrsp_rcv;
 109         /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
 110         struct uiscmdrsp *xmit_cmdrsp;
 111
 112         bool server_down;                /* IOPART is down */
 113         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
 114         bool going_away;                 /* device is being torn down */
 115         struct dentry *eth_debugfs_dir;
 116         u64 interrupts_rcvd;
 117         u64 interrupts_notme;
 118         u64 interrupts_disabled;
 119         u64 busy_cnt;
 120         spinlock_t priv_lock;  /* spinlock to access devdata structures */
 121
 122         /* flow control counter */
 123         u64 flow_control_upper_hits;
 124         u64 flow_control_lower_hits;
 125
 126         /* debug counters */
 127         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
 128         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
 129         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
 130         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
 131         unsigned long found_repost_rcvbuf_cnt;  /* # repost_rcvbuf_cnt */
 132         unsigned long repost_found_skb_cnt;     /* # of found the skb */
 133         unsigned long n_repost_deficit;         /* # of lost rcv buffers */
 134         unsigned long bad_rcv_buf; /* # of unknown rcv skb  not freed */
 135         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
 136
 137         int queuefullmsg_logged;
 138         struct chanstat chstat;
 139         struct timer_list irq_poll_timer;
 140         struct napi_struct napi;
 141         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 142 };
 143
 144 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
 145 static inline u16
 146 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u32 inp_len, u16 index,
 147                      u16 max_pi_arr_entries, struct phys_info pi_arr[])
 148 {
 149         u32 len;
 150         u16 i, firstlen;
 151
 152         firstlen = PI_PAGE_SIZE - inp_off;
 153         if (inp_len <= firstlen) {
 154                 /* The input entry spans only one page - add as is. */
 155                 if (index >= max_pi_arr_entries)
 156                         return 0;
 157                 pi_arr[index].pi_pfn = inp_pfn;
 158                 pi_arr[index].pi_off = (u16)inp_off;
 159                 pi_arr[index].pi_len = (u16)inp_len;
 160                 return index + 1;
 161         }
 162
 163         /* This entry spans multiple pages. */
 164         for (len = inp_len, i = 0; len;
 165                 len -= pi_arr[index + i].pi_len, i++) {
 166                 if (index + i >= max_pi_arr_entries)
 167                         return 0;
 168                 pi_arr[index + i].pi_pfn = inp_pfn + i;
 169                 if (i == 0) {
 170                         pi_arr[index].pi_off = inp_off;
 171                         pi_arr[index].pi_len = firstlen;
 172                 } else {
 173                         pi_arr[index + i].pi_off = 0;
 174                         pi_arr[index + i].pi_len =
 175                             (u16)MINNUM(len, (u32)PI_PAGE_SIZE);
 176                 }
 177         }
 178         return index + i;
 179 }
 180
 181 /*
 182  *      visor_copy_fragsinfo_from_skb(
 183  *      @skb_in: skbuff that we are pulling the frags from
 184  *      @firstfraglen: length of first fragment in skb
 185  *      @frags_max: max len of frags array
 186  *      @frags: frags array filled in on output
 187  *
 188  *      Copy the fragment list in the SKB to a phys_info
 189  *      array that the IOPART understands.
 190  *      Return value indicates number of entries filled in frags
 191  *      Negative values indicate an error.
 192  */
 193 static int
 194 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
 195                               unsigned int frags_max,
 196                               struct phys_info frags[])
 197 {
 198         unsigned int count = 0, frag, size, offset = 0, numfrags;
 199         unsigned int total_count;
 200
 201         numfrags = skb_shinfo(skb)->nr_frags;
 202
 203         /* Compute the number of fragments this skb has, and if its more than
 204          * frag array can hold, linearize the skb
 205          */
 206         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 207         if (firstfraglen % PI_PAGE_SIZE)
 208                 total_count++;
 209
 210         if (total_count > frags_max) {
 211                 if (skb_linearize(skb))
 212                         return -EINVAL;
 213                 numfrags = skb_shinfo(skb)->nr_frags;
 214                 firstfraglen = 0;
 215         }
 216
 217         while (firstfraglen) {
 218                 if (count == frags_max)
 219                         return -EINVAL;
 220
 221                 frags[count].pi_pfn =
 222                         page_to_pfn(virt_to_page(skb->data + offset));
 223                 frags[count].pi_off =
 224                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 225                 size = min_t(unsigned int, firstfraglen,
 226                              PI_PAGE_SIZE - frags[count].pi_off);
 227
 228                 /* can take smallest of firstfraglen (what's left) OR
 229                  * bytes left in the page
 230                  */
 231                 frags[count].pi_len = size;
 232                 firstfraglen -= size;
 233                 offset += size;
 234                 count++;
 235         }
 236         if (numfrags) {
 237                 if ((count + numfrags) > frags_max)
 238                         return -EINVAL;
 239
 240                 for (frag = 0; frag < numfrags; frag++) {
 241                         count = add_physinfo_entries(page_to_pfn(
 242                                 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 243                                               skb_shinfo(skb)->frags[frag].
 244                                               page_offset,
 245                                               skb_shinfo(skb)->frags[frag].
 246                                               size, count, frags_max, frags);
 247                         /* add_physinfo_entries only returns
 248                          * zero if the frags array is out of room
 249                          * That should never happen because we
 250                          * fail above, if count+numfrags > frags_max.
 251                          */
 252                         if (!count)
 253                                 return -EINVAL;
 254                 }
 255         }
 256         if (skb_shinfo(skb)->frag_list) {
 257                 struct sk_buff *skbinlist;
 258                 int c;
 259
 260                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 261                      skbinlist = skbinlist->next) {
 262                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 263                                                           skbinlist->len -
 264                                                           skbinlist->data_len,
 265                                                           frags_max - count,
 266                                                           &frags[count]);
 267                         if (c < 0)
 268                                 return c;
 269                         count += c;
 270                 }
 271         }
 272         return count;
 273 }
 274
 275 static ssize_t enable_ints_write(struct file *file,
 276                                  const char __user *buffer,
 277                                  size_t count, loff_t *ppos)
 278 {
 279         /* Don't want to break ABI here by having a debugfs
 280          * file that no longer exists or is writable, so
 281          * lets just make this a vestigual function
 282          */
 283         return count;
 284 }
 285
 286 static const struct file_operations debugfs_enable_ints_fops = {
 287         .write = enable_ints_write,
 288 };
 289
 290 /*
 291  *      visornic_serverdown_complete - IOPART went down, pause device
 292  *      @work: Work queue it was scheduled on
 293  *
 294  *      The IO partition has gone down and we need to do some cleanup
 295  *      for when it comes back. Treat the IO partition as the link
 296  *      being down.
 297  *      Returns void.
 298  */
 299 static void
 300 visornic_serverdown_complete(struct visornic_devdata *devdata)
 301 {
 302         struct net_device *netdev;
 303
 304         netdev = devdata->netdev;
 305
 306         /* Stop polling for interrupts */
 307         del_timer_sync(&devdata->irq_poll_timer);
 308
 309         rtnl_lock();
 310         dev_close(netdev);
 311         rtnl_unlock();
 312
 313         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 314         devdata->chstat.sent_xmit = 0;
 315         devdata->chstat.got_xmit_done = 0;
 316
 317         if (devdata->server_down_complete_func)
 318                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 319
 320         devdata->server_down = true;
 321         devdata->server_change_state = false;
 322         devdata->server_down_complete_func = NULL;
 323 }
 324
 325 /*
 326  *      visornic_serverdown - Command has notified us that IOPART is down
 327  *      @devdata: device that is being managed by IOPART
 328  *
 329  *      Schedule the work needed to handle the server down request. Make
 330  *      sure we haven't already handled the server change state event.
 331  *      Returns 0 if we scheduled the work, -EINVAL on error.
 332  */
 333 static int
 334 visornic_serverdown(struct visornic_devdata *devdata,
 335                     visorbus_state_complete_func complete_func)
 336 {
 337         unsigned long flags;
 338         int err;
 339
 340         spin_lock_irqsave(&devdata->priv_lock, flags);
 341         if (devdata->server_change_state) {
 342                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 343                         __func__);
 344                 err = -EINVAL;
 345                 goto err_unlock;
 346         }
 347         if (devdata->server_down) {
 348                 dev_dbg(&devdata->dev->device, "%s already down\n",
 349                         __func__);
 350                 err = -EINVAL;
 351                 goto err_unlock;
 352         }
 353         if (devdata->going_away) {
 354                 dev_dbg(&devdata->dev->device,
 355                         "%s aborting because device removal pending\n",
 356                         __func__);
 357                 err = -ENODEV;
 358                 goto err_unlock;
 359         }
 360         devdata->server_change_state = true;
 361         devdata->server_down_complete_func = complete_func;
 362         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 363
 364         visornic_serverdown_complete(devdata);
 365         return 0;
 366
 367 err_unlock:
 368         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 369         return err;
 370 }
 371
 372 /*
 373  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
 374  *      @netdev: network adapter the rcv bufs are attached too.
 375  *
 376  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 377  *      so that it can write rcv data into our memory space.
 378  *      Return pointer to sk_buff
 379  */
 380 static struct sk_buff *
 381 alloc_rcv_buf(struct net_device *netdev)
 382 {
 383         struct sk_buff *skb;
 384
 385         /* NOTE: the first fragment in each rcv buffer is pointed to by
 386          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 387          * in length, so the first frag is large enough to hold 1514.
 388          */
 389         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 390         if (!skb)
 391                 return NULL;
 392         skb->dev = netdev;
 393         /* current value of mtu doesn't come into play here; large
 394          * packets will just end up using multiple rcv buffers all of
 395          * same size.
 396          */
 397         skb->len = RCVPOST_BUF_SIZE;
 398         /* alloc_skb already zeroes it out for clarification. */
 399         skb->data_len = 0;
 400         return skb;
 401 }
 402
 403 /*
 404  *      post_skb        - post a skb to the IO Partition.
 405  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
 406  *      @devdata: visornic_devdata to post the skb too
 407  *      @skb: skb to give to the IO partition
 408  *
 409  *      Send the skb to the IO Partition.
 410  *      Returns 0 or error
 411  */
 412 static int
 413 post_skb(struct uiscmdrsp *cmdrsp,
 414          struct visornic_devdata *devdata, struct sk_buff *skb)
 415 {
 416         int err;
 417
 418         cmdrsp->net.buf = skb;
 419         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 420         cmdrsp->net.rcvpost.frag.pi_off =
 421                 (unsigned long)skb->data & PI_PAGE_MASK;
 422         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 423         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 424
 425         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
 426                 return -EINVAL;
 427
 428         cmdrsp->net.type = NET_RCV_POST;
 429         cmdrsp->cmdtype = CMD_NET_TYPE;
 430         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 431                                         IOCHAN_TO_IOPART,
 432                                         cmdrsp);
 433         if (err) {
 434                 devdata->chstat.sent_post_failed++;
 435                 return err;
 436         }
 437
 438         atomic_inc(&devdata->num_rcvbuf_in_iovm);
 439         devdata->chstat.sent_post++;
 440
 441         return 0;
 442 }
 443
 444 /*
 445  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
 446  *      @netdev: netdevice we are enable/disable, used as context
 447  *               return value
 448  *      @state: enable = 1/disable = 0
 449  *      @devdata: visornic device we are enabling/disabling
 450  *
 451  *      Send the enable/disable message to the IO Partition.
 452  *      Returns 0 or error
 453  */
 454 static int
 455 send_enbdis(struct net_device *netdev, int state,
 456             struct visornic_devdata *devdata)
 457 {
 458         int err;
 459
 460         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 461         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 462         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 463         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 464         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 465                                         IOCHAN_TO_IOPART,
 466                                         devdata->cmdrsp_rcv);
 467         if (err)
 468                 return err;
 469         devdata->chstat.sent_enbdis++;
 470         return 0;
 471 }
 472
 473 /*
 474  *      visornic_disable_with_timeout - Disable network adapter
 475  *      @netdev: netdevice to disable
 476  *      @timeout: timeout to wait for disable
 477  *
 478  *      Disable the network adapter and inform the IO Partition that we
 479  *      are disabled, reclaim memory from rcv bufs.
 480  *      Returns 0 on success, negative for failure of IO Partition
 481  *      responding.
 482  *
 483  */
 484 static int
 485 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
 486 {
 487         struct visornic_devdata *devdata = netdev_priv(netdev);
 488         int i;
 489         unsigned long flags;
 490         int wait = 0;
 491         int err;
 492
 493         /* send a msg telling the other end we are stopping incoming pkts */
 494         spin_lock_irqsave(&devdata->priv_lock, flags);
 495         devdata->enabled = 0;
 496         devdata->enab_dis_acked = 0; /* must wait for ack */
 497         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 498
 499         /* send disable and wait for ack -- don't hold lock when sending
 500          * disable because if the queue is full, insert might sleep.
 501          * If an error occurs, don't wait for the timeout.
 502          */
 503         err = send_enbdis(netdev, 0, devdata);
 504         if (err)
 505                 return err;
 506
 507         /* wait for ack to arrive before we try to free rcv buffers
 508          * NOTE: the other end automatically unposts the rcv buffers when
 509          * when it gets a disable.
 510          */
 511         spin_lock_irqsave(&devdata->priv_lock, flags);
 512         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 513                (wait < timeout)) {
 514                 if (devdata->enab_dis_acked)
 515                         break;
 516                 if (devdata->server_down || devdata->server_change_state) {
 517                         dev_dbg(&netdev->dev, "%s server went away\n",
 518                                 __func__);
 519                         break;
 520                 }
 521                 set_current_state(TASK_INTERRUPTIBLE);
 522                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 523                 wait += schedule_timeout(msecs_to_jiffies(10));
 524                 spin_lock_irqsave(&devdata->priv_lock, flags);
 525         }
 526
 527         /* Wait for usage to go to 1 (no other users) before freeing
 528          * rcv buffers
 529          */
 530         if (atomic_read(&devdata->usage) > 1) {
 531                 while (1) {
 532                         set_current_state(TASK_INTERRUPTIBLE);
 533                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 534                         schedule_timeout(msecs_to_jiffies(10));
 535                         spin_lock_irqsave(&devdata->priv_lock, flags);
 536                         if (atomic_read(&devdata->usage))
 537                                 break;
 538                 }
 539         }
 540         /* we've set enabled to 0, so we can give up the lock. */
 541         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 542
 543         /* stop the transmit queue so nothing more can be transmitted */
 544         netif_stop_queue(netdev);
 545
 546         napi_disable(&devdata->napi);
 547
 548         skb_queue_purge(&devdata->xmitbufhead);
 549
 550         /* Free rcv buffers - other end has automatically unposed them on
 551          * disable
 552          */
 553         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 554                 if (devdata->rcvbuf[i]) {
 555                         kfree_skb(devdata->rcvbuf[i]);
 556                         devdata->rcvbuf[i] = NULL;
 557                 }
 558         }
 559
 560         return 0;
 561 }
 562
 563 /*
 564  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
 565  *      @netdev: struct netdevice
 566  *      @devdata: visornic_devdata
 567  *
 568  *      Allocate rcv buffers and post them to the IO Partition.
 569  *      Return 0 for success, and negative for failure.
 570  */
 571 static int
 572 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
 573 {
 574         int i, j, count, err;
 575
 576         /* allocate fixed number of receive buffers to post to uisnic
 577          * post receive buffers after we've allocated a required amount
 578          */
 579         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 580                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 581                 if (!devdata->rcvbuf[i])
 582                         break; /* if we failed to allocate one let us stop */
 583         }
 584         if (i == 0) /* couldn't even allocate one -- bail out */
 585                 return -ENOMEM;
 586         count = i;
 587
 588         /* Ensure we can alloc 2/3rd of the requested number of buffers.
 589          * 2/3 is an arbitrary choice; used also in ndis init.c
 590          */
 591         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 592                 /* free receive buffers we did alloc and then bail out */
 593                 for (i = 0; i < count; i++) {
 594                         kfree_skb(devdata->rcvbuf[i]);
 595                         devdata->rcvbuf[i] = NULL;
 596                 }
 597                 return -ENOMEM;
 598         }
 599
 600         /* post receive buffers to receive incoming input - without holding
 601          * lock - we've not enabled nor started the queue so there shouldn't
 602          * be any rcv or xmit activity
 603          */
 604         for (i = 0; i < count; i++) {
 605                 err = post_skb(devdata->cmdrsp_rcv, devdata,
 606                                devdata->rcvbuf[i]);
 607                 if (!err)
 608                         continue;
 609
 610                 /* Error handling -
 611                  * If we posted at least one skb, we should return success,
 612                  * but need to free the resources that we have not successfully
 613                  * posted.
 614                  */
 615                 for (j = i; j < count; j++) {
 616                         kfree_skb(devdata->rcvbuf[j]);
 617                         devdata->rcvbuf[j] = NULL;
 618                 }
 619                 if (i == 0)
 620                         return err;
 621                 break;
 622         }
 623
 624         return 0;
 625 }
 626
 627 /*
 628  *      visornic_enable_with_timeout    - send enable to IO Part
 629  *      @netdev: struct net_device
 630  *      @timeout: Time to wait for the ACK from the enable
 631  *
 632  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
 633  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
 634  *      Return 0 for success, negative for failure.
 635  */
 636 static int
 637 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
 638 {
 639         int err = 0;
 640         struct visornic_devdata *devdata = netdev_priv(netdev);
 641         unsigned long flags;
 642         int wait = 0;
 643
 644         napi_enable(&devdata->napi);
 645
 646         /* NOTE: the other end automatically unposts the rcv buffers when it
 647          * gets a disable.
 648          */
 649         err = init_rcv_bufs(netdev, devdata);
 650         if (err < 0) {
 651                 dev_err(&netdev->dev,
 652                         "%s failed to init rcv bufs\n", __func__);
 653                 return err;
 654         }
 655
 656         spin_lock_irqsave(&devdata->priv_lock, flags);
 657         devdata->enabled = 1;
 658         devdata->enab_dis_acked = 0;
 659
 660         /* now we're ready, let's send an ENB to uisnic but until we get
 661          * an ACK back from uisnic, we'll drop the packets
 662          */
 663         devdata->n_rcv_packets_not_accepted = 0;
 664         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 665
 666         /* send enable and wait for ack -- don't hold lock when sending enable
 667          * because if the queue is full, insert might sleep. If an error
 668          * occurs error out.
 669          */
 670         err = send_enbdis(netdev, 1, devdata);
 671         if (err)
 672                 return err;
 673
 674         spin_lock_irqsave(&devdata->priv_lock, flags);
 675         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 676                (wait < timeout)) {
 677                 if (devdata->enab_dis_acked)
 678                         break;
 679                 if (devdata->server_down || devdata->server_change_state) {
 680                         dev_dbg(&netdev->dev, "%s server went away\n",
 681                                 __func__);
 682                         break;
 683                 }
 684                 set_current_state(TASK_INTERRUPTIBLE);
 685                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 686                 wait += schedule_timeout(msecs_to_jiffies(10));
 687                 spin_lock_irqsave(&devdata->priv_lock, flags);
 688         }
 689
 690         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 691
 692         if (!devdata->enab_dis_acked) {
 693                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 694                 return -EIO;
 695         }
 696
 697         netif_start_queue(netdev);
 698
 699         return 0;
 700 }
 701
 702 /*
 703  *      visornic_timeout_reset  - handle xmit timeout resets
 704  *      @work   work item that scheduled the work
 705  *
 706  *      Transmit Timeouts are typically handled by resetting the
 707  *      device for our virtual NIC we will send a Disable and Enable
 708  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
 709  */
 710 static void
 711 visornic_timeout_reset(struct work_struct *work)
 712 {
 713         struct visornic_devdata *devdata;
 714         struct net_device *netdev;
 715         int response = 0;
 716
 717         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 718         netdev = devdata->netdev;
 719
 720         rtnl_lock();
 721         if (!netif_running(netdev)) {
 722                 rtnl_unlock();
 723                 return;
 724         }
 725
 726         response = visornic_disable_with_timeout(netdev,
 727                                                  VISORNIC_INFINITE_RSP_WAIT);
 728         if (response)
 729                 goto call_serverdown;
 730
 731         response = visornic_enable_with_timeout(netdev,
 732                                                 VISORNIC_INFINITE_RSP_WAIT);
 733         if (response)
 734                 goto call_serverdown;
 735
 736         rtnl_unlock();
 737
 738         return;
 739
 740 call_serverdown:
 741         visornic_serverdown(devdata, NULL);
 742         rtnl_unlock();
 743 }
 744
 745 /*
 746  *      visornic_open - Enable the visornic device and mark the queue started
 747  *      @netdev: netdevice to start
 748  *
 749  *      Enable the device and start the transmit queue.
 750  *      Return 0 for success
 751  */
 752 static int
 753 visornic_open(struct net_device *netdev)
 754 {
 755         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 756
 757         return 0;
 758 }
 759
 760 /*
 761  *      visornic_close - Disables the visornic device and stops the queues
 762  *      @netdev: netdevice to start
 763  *
 764  *      Disable the device and stop the transmit queue.
 765  *      Return 0 for success
 766  */
 767 static int
 768 visornic_close(struct net_device *netdev)
 769 {
 770         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 771
 772         return 0;
 773 }
 774
 775 /*
 776  *      devdata_xmits_outstanding - compute outstanding xmits
 777  *      @devdata: visornic_devdata for device
 778  *
 779  *      Return value is the number of outstanding xmits.
 780  */
 781 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 782 {
 783         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 784                 return devdata->chstat.sent_xmit -
 785                         devdata->chstat.got_xmit_done;
 786         return (ULONG_MAX - devdata->chstat.got_xmit_done
 787                 + devdata->chstat.sent_xmit + 1);
 788 }
 789
 790 /*
 791  *      vnic_hit_high_watermark
 792  *      @devdata: indicates visornic device we are checking
 793  *      @high_watermark: max num of unacked xmits we will tolerate,
 794  *                       before we will start throttling
 795  *
 796  *      Returns true iff the number of unacked xmits sent to
 797  *      the IO partition is >= high_watermark.
 798  */
 799 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 800                                     ulong high_watermark)
 801 {
 802         return (devdata_xmits_outstanding(devdata) >= high_watermark);
 803 }
 804
 805 /*
 806  *      vnic_hit_low_watermark
 807  *      @devdata: indicates visornic device we are checking
 808  *      @low_watermark: we will wait until the num of unacked xmits
 809  *                      drops to this value or lower before we start
 810  *                      transmitting again
 811  *
 812  *      Returns true iff the number of unacked xmits sent to
 813  *      the IO partition is <= low_watermark.
 814  */
 815 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 816                                    ulong low_watermark)
 817 {
 818         return (devdata_xmits_outstanding(devdata) <= low_watermark);
 819 }
 820
 821 /*
 822  *      visornic_xmit - send a packet to the IO Partition
 823  *      @skb: Packet to be sent
 824  *      @netdev: net device the packet is being sent from
 825  *
 826  *      Convert the skb to a cmdrsp so the IO Partition can understand it.
 827  *      Send the XMIT command to the IO Partition for processing. This
 828  *      function is protected from concurrent calls by a spinlock xmit_lock
 829  *      in the net_device struct, but as soon as the function returns it
 830  *      can be called again.
 831  *      Returns NETDEV_TX_OK.
 832  */
 833 static int
 834 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 835 {
 836         struct visornic_devdata *devdata;
 837         int len, firstfraglen, padlen;
 838         struct uiscmdrsp *cmdrsp = NULL;
 839         unsigned long flags;
 840         int err;
 841
 842         devdata = netdev_priv(netdev);
 843         spin_lock_irqsave(&devdata->priv_lock, flags);
 844
 845         if (netif_queue_stopped(netdev) || devdata->server_down ||
 846             devdata->server_change_state) {
 847                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 848                 devdata->busy_cnt++;
 849                 dev_dbg(&netdev->dev,
 850                         "%s busy - queue stopped\n", __func__);
 851                 kfree_skb(skb);
 852                 return NETDEV_TX_OK;
 853         }
 854
 855         /* sk_buff struct is used to host network data throughout all the
 856          * linux network subsystems
 857          */
 858         len = skb->len;
 859
 860         /* skb->len is the FULL length of data (including fragmentary portion)
 861          * skb->data_len is the length of the fragment portion in frags
 862          * skb->len - skb->data_len is size of the 1st fragment in skb->data
 863          * calculate the length of the first fragment that skb->data is
 864          * pointing to
 865          */
 866         firstfraglen = skb->len - skb->data_len;
 867         if (firstfraglen < ETH_HLEN) {
 868                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 869                 devdata->busy_cnt++;
 870                 dev_err(&netdev->dev,
 871                         "%s busy - first frag too small (%d)\n",
 872                         __func__, firstfraglen);
 873                 kfree_skb(skb);
 874                 return NETDEV_TX_OK;
 875         }
 876
 877         if ((len < ETH_MIN_PACKET_SIZE) &&
 878             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 879                 /* pad the packet out to minimum size */
 880                 padlen = ETH_MIN_PACKET_SIZE - len;
 881                 memset(&skb->data[len], 0, padlen);
 882                 skb->tail += padlen;
 883                 skb->len += padlen;
 884                 len += padlen;
 885                 firstfraglen += padlen;
 886         }
 887
 888         cmdrsp = devdata->xmit_cmdrsp;
 889         /* clear cmdrsp */
 890         memset(cmdrsp, 0, SIZEOF_CMDRSP);
 891         cmdrsp->net.type = NET_XMIT;
 892         cmdrsp->cmdtype = CMD_NET_TYPE;
 893
 894         /* save the pointer to skb -- we'll need it for completion */
 895         cmdrsp->net.buf = skb;
 896
 897         if (vnic_hit_high_watermark(devdata,
 898                                     devdata->max_outstanding_net_xmits)) {
 899                 /* extra NET_XMITs queued over to IOVM - need to wait */
 900                 devdata->chstat.reject_count++;
 901                 if (!devdata->queuefullmsg_logged &&
 902                     ((devdata->chstat.reject_count & 0x3ff) == 1))
 903                         devdata->queuefullmsg_logged = 1;
 904                 netif_stop_queue(netdev);
 905                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 906                 devdata->busy_cnt++;
 907                 dev_dbg(&netdev->dev,
 908                         "%s busy - waiting for iovm to catch up\n",
 909                         __func__);
 910                 kfree_skb(skb);
 911                 return NETDEV_TX_OK;
 912         }
 913         if (devdata->queuefullmsg_logged)
 914                 devdata->queuefullmsg_logged = 0;
 915
 916         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 917                 cmdrsp->net.xmt.lincsum.valid = 1;
 918                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 919                 if (skb_transport_header(skb) > skb->data) {
 920                         cmdrsp->net.xmt.lincsum.hrawoff =
 921                                 skb_transport_header(skb) - skb->data;
 922                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
 923                 }
 924                 if (skb_network_header(skb) > skb->data) {
 925                         cmdrsp->net.xmt.lincsum.nhrawoff =
 926                                 skb_network_header(skb) - skb->data;
 927                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 928                 }
 929                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
 930         } else {
 931                 cmdrsp->net.xmt.lincsum.valid = 0;
 932         }
 933
 934         /* save off the length of the entire data packet */
 935         cmdrsp->net.xmt.len = len;
 936
 937         /* copy ethernet header from first frag into ocmdrsp
 938          * - everything else will be pass in frags & DMA'ed
 939          */
 940         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 941         /* copy frags info - from skb->data we need to only provide access
 942          * beyond eth header
 943          */
 944         cmdrsp->net.xmt.num_frags =
 945                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 946                                               MAX_PHYS_INFO,
 947                                               cmdrsp->net.xmt.frags);
 948         if (cmdrsp->net.xmt.num_frags < 0) {
 949                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 950                 devdata->busy_cnt++;
 951                 dev_err(&netdev->dev,
 952                         "%s busy - copy frags failed\n", __func__);
 953                 kfree_skb(skb);
 954                 return NETDEV_TX_OK;
 955         }
 956
 957         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 958                                         IOCHAN_TO_IOPART, cmdrsp);
 959         if (err) {
 960                 netif_stop_queue(netdev);
 961                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 962                 devdata->busy_cnt++;
 963                 dev_dbg(&netdev->dev,
 964                         "%s busy - signalinsert failed\n", __func__);
 965                 kfree_skb(skb);
 966                 return NETDEV_TX_OK;
 967         }
 968
 969         /* Track the skbs that have been sent to the IOVM for XMIT */
 970         skb_queue_head(&devdata->xmitbufhead, skb);
 971
 972         /* update xmt stats */
 973         devdata->net_stats.tx_packets++;
 974         devdata->net_stats.tx_bytes += skb->len;
 975         devdata->chstat.sent_xmit++;
 976
 977         /* check if we have hit the high watermark for netif_stop_queue() */
 978         if (vnic_hit_high_watermark(devdata,
 979                                     devdata->upper_threshold_net_xmits)) {
 980                 /* extra NET_XMITs queued over to IOVM - need to wait */
 981                 /* stop queue - call netif_wake_queue() after lower threshold */
 982                 netif_stop_queue(netdev);
 983                 dev_dbg(&netdev->dev,
 984                         "%s busy - invoking iovm flow control\n",
 985                         __func__);
 986                 devdata->flow_control_upper_hits++;
 987         }
 988         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 989
 990         /* skb will be freed when we get back NET_XMIT_DONE */
 991         return NETDEV_TX_OK;
 992 }
 993
 994 /*
 995  *      visornic_get_stats - returns net_stats of the visornic device
 996  *      @netdev: netdevice
 997  *
 998  *      Returns the net_device_stats for the device
 999  */
1000 static struct net_device_stats *
1001 visornic_get_stats(struct net_device *netdev)
1002 {
1003         struct visornic_devdata *devdata = netdev_priv(netdev);
1004
1005         return &devdata->net_stats;
1006 }
1007
1008 /*
1009  *      visornic_change_mtu - changes mtu of device.
1010  *      @netdev: netdevice
1011  *      @new_mtu: value of new mtu
1012  *
1013  *      MTU cannot be changed by system, must be changed via
1014  *      CONTROLVM message. All vnics and pnics in a switch have
1015  *      to have the same MTU for everything to work.
1016  *      Currently not supported.
1017  *      Returns EINVAL
1018  */
1019 static int
1020 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1021 {
1022         return -EINVAL;
1023 }
1024
1025 /*
1026  *      visornic_set_multi - changes mtu of device.
1027  *      @netdev: netdevice
1028  *
1029  *      Only flag we support currently is IFF_PROMISC
1030  *      Returns void
1031  */
1032 static void
1033 visornic_set_multi(struct net_device *netdev)
1034 {
1035         struct uiscmdrsp *cmdrsp;
1036         struct visornic_devdata *devdata = netdev_priv(netdev);
1037         int err = 0;
1038
1039         if (devdata->old_flags == netdev->flags)
1040                 return;
1041
1042         if ((netdev->flags & IFF_PROMISC) ==
1043             (devdata->old_flags & IFF_PROMISC))
1044                 goto out_save_flags;
1045
1046         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1047         if (!cmdrsp)
1048                 return;
1049         cmdrsp->cmdtype = CMD_NET_TYPE;
1050         cmdrsp->net.type = NET_RCV_PROMISC;
1051         cmdrsp->net.enbdis.context = netdev;
1052         cmdrsp->net.enbdis.enable =
1053                 netdev->flags & IFF_PROMISC;
1054         err = visorchannel_signalinsert(devdata->dev->visorchannel,
1055                                         IOCHAN_TO_IOPART,
1056                                         cmdrsp);
1057         kfree(cmdrsp);
1058         if (err)
1059                 return;
1060
1061 out_save_flags:
1062         devdata->old_flags = netdev->flags;
1063 }
1064
1065 /*
1066  *      visornic_xmit_timeout - request to timeout the xmit
1067  *      @netdev
1068  *
1069  *      Queue the work and return. Make sure we have not already
1070  *      been informed the IO Partition is gone, if it is gone
1071  *      we will already timeout the xmits.
1072  */
1073 static void
1074 visornic_xmit_timeout(struct net_device *netdev)
1075 {
1076         struct visornic_devdata *devdata = netdev_priv(netdev);
1077         unsigned long flags;
1078
1079         spin_lock_irqsave(&devdata->priv_lock, flags);
1080         if (devdata->going_away) {
1081                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1082                 dev_dbg(&devdata->dev->device,
1083                         "%s aborting because device removal pending\n",
1084                         __func__);
1085                 return;
1086         }
1087
1088         /* Ensure that a ServerDown message hasn't been received */
1089         if (!devdata->enabled ||
1090             (devdata->server_down && !devdata->server_change_state)) {
1091                 dev_dbg(&netdev->dev, "%s no processing\n",
1092                         __func__);
1093                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1094                 return;
1095         }
1096         schedule_work(&devdata->timeout_reset);
1097         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1098 }
1099
1100 /*
1101  *      repost_return   - repost rcv bufs that have come back
1102  *      @cmdrsp: io channel command struct to post
1103  *      @devdata: visornic devdata for the device
1104  *      @skb: skb
1105  *      @netdev: netdevice
1106  *
1107  *      Repost rcv buffers that have been returned to us when
1108  *      we are finished with them.
1109  *      Returns 0 for success, -1 for error.
1110  */
1111 static int
1112 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1113               struct sk_buff *skb, struct net_device *netdev)
1114 {
1115         struct net_pkt_rcv copy;
1116         int i = 0, cc, numreposted;
1117         int found_skb = 0;
1118         int status = 0;
1119
1120         copy = cmdrsp->net.rcv;
1121         switch (copy.numrcvbufs) {
1122         case 0:
1123                 devdata->n_rcv0++;
1124                 break;
1125         case 1:
1126                 devdata->n_rcv1++;
1127                 break;
1128         case 2:
1129                 devdata->n_rcv2++;
1130                 break;
1131         default:
1132                 devdata->n_rcvx++;
1133                 break;
1134         }
1135         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1136                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1137                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1138                                 continue;
1139
1140                         if ((skb) && devdata->rcvbuf[i] == skb) {
1141                                 devdata->found_repost_rcvbuf_cnt++;
1142                                 found_skb = 1;
1143                                 devdata->repost_found_skb_cnt++;
1144                         }
1145                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1146                         if (!devdata->rcvbuf[i]) {
1147                                 devdata->num_rcv_bufs_could_not_alloc++;
1148                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1149                                 status = -ENOMEM;
1150                                 break;
1151                         }
1152                         status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1153                         if (status) {
1154                                 kfree_skb(devdata->rcvbuf[i]);
1155                                 devdata->rcvbuf[i] = NULL;
1156                                 break;
1157                         }
1158                         numreposted++;
1159                         break;
1160                 }
1161         }
1162         if (numreposted != copy.numrcvbufs) {
1163                 devdata->n_repost_deficit++;
1164                 status = -EINVAL;
1165         }
1166         if (skb) {
1167                 if (found_skb) {
1168                         kfree_skb(skb);
1169                 } else {
1170                         status = -EINVAL;
1171                         devdata->bad_rcv_buf++;
1172                 }
1173         }
1174         return status;
1175 }
1176
1177 /*
1178  *      visornic_rx - Handle receive packets coming back from IO Part
1179  *      @cmdrsp: Receive packet returned from IO Part
1180  *
1181  *      Got a receive packet back from the IO Part, handle it and send
1182  *      it up the stack.
1183  *      Returns 1 iff an skb was received, otherwise 0
1184  */
1185 static int
1186 visornic_rx(struct uiscmdrsp *cmdrsp)
1187 {
1188         struct visornic_devdata *devdata;
1189         struct sk_buff *skb, *prev, *curr;
1190         struct net_device *netdev;
1191         int cc, currsize, off;
1192         struct ethhdr *eth;
1193         unsigned long flags;
1194
1195         /* post new rcv buf to the other end using the cmdrsp we have at hand
1196          * post it without holding lock - but we'll use the signal lock to
1197          * synchronize the queue insert the cmdrsp that contains the net.rcv
1198          * is the one we are using to repost, so copy the info we need from it.
1199          */
1200         skb = cmdrsp->net.buf;
1201         netdev = skb->dev;
1202
1203         devdata = netdev_priv(netdev);
1204
1205         spin_lock_irqsave(&devdata->priv_lock, flags);
1206         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1207
1208         /* set length to how much was ACTUALLY received -
1209          * NOTE: rcv_done_len includes actual length of data rcvd
1210          * including ethhdr
1211          */
1212         skb->len = cmdrsp->net.rcv.rcv_done_len;
1213
1214         /* update rcv stats - call it with priv_lock held */
1215         devdata->net_stats.rx_packets++;
1216         devdata->net_stats.rx_bytes += skb->len;
1217
1218         /* test enabled while holding lock */
1219         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1220                 /* don't process it unless we're in enable mode and until
1221                  * we've gotten an ACK saying the other end got our RCV enable
1222                  */
1223                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1224                 repost_return(cmdrsp, devdata, skb, netdev);
1225                 return 0;
1226         }
1227
1228         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1229
1230         /* when skb was allocated, skb->dev, skb->data, skb->len and
1231          * skb->data_len were setup. AND, data has already put into the
1232          * skb (both first frag and in frags pages)
1233          * NOTE: firstfragslen is the amount of data in skb->data and that
1234          * which is not in nr_frags or frag_list. This is now simply
1235          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1236          * firstfrag & set data_len to show rest see if we have to chain
1237          * frag_list.
1238          */
1239         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1240                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1241                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1242                                 dev_err(&devdata->netdev->dev,
1243                                         "repost_return failed");
1244                         return 0;
1245                 }
1246                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1247                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1248                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1249                                                                  *  will be in
1250                                                                  * frag_list
1251                                                                  */
1252         } else {
1253                 /* data fits in this skb - no chaining - do
1254                  * PRECAUTIONARY check
1255                  */
1256                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1257                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1258                                 dev_err(&devdata->netdev->dev,
1259                                         "repost_return failed");
1260                         return 0;
1261                 }
1262                 skb->tail += skb->len;
1263                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1264         }
1265         off = skb_tail_pointer(skb) - skb->data;
1266
1267         /* amount we bumped tail by in the head skb
1268          * it is used to calculate the size of each chained skb below
1269          * it is also used to index into bufline to continue the copy
1270          * (for chansocktwopc)
1271          * if necessary chain the rcv skbs together.
1272          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1273          * chain the rest to that one.
1274          * - do PRECAUTIONARY check
1275          */
1276         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1277                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1278                         dev_err(&devdata->netdev->dev, "repost_return failed");
1279                 return 0;
1280         }
1281
1282         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1283                 /* chain the various rcv buffers into the skb's frag_list. */
1284                 /* Note: off was initialized above  */
1285                 for (cc = 1, prev = NULL;
1286                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1287                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1288                         curr->next = NULL;
1289                         if (!prev)      /* start of list- set head */
1290                                 skb_shinfo(skb)->frag_list = curr;
1291                         else
1292                                 prev->next = curr;
1293                         prev = curr;
1294
1295                         /* should we set skb->len and skb->data_len for each
1296                          * buffer being chained??? can't hurt!
1297                          */
1298                         currsize = min(skb->len - off,
1299                                        (unsigned int)RCVPOST_BUF_SIZE);
1300                         curr->len = currsize;
1301                         curr->tail += currsize;
1302                         curr->data_len = 0;
1303                         off += currsize;
1304                 }
1305                 /* assert skb->len == off */
1306                 if (skb->len != off) {
1307                         netdev_err(devdata->netdev,
1308                                    "something wrong; skb->len:%d != off:%d\n",
1309                                    skb->len, off);
1310                 }
1311         }
1312
1313         /* set up packet's protocol type using ethernet header - this
1314          * sets up skb->pkt_type & it also PULLS out the eth header
1315          */
1316         skb->protocol = eth_type_trans(skb, netdev);
1317
1318         eth = eth_hdr(skb);
1319
1320         skb->csum = 0;
1321         skb->ip_summed = CHECKSUM_NONE;
1322
1323         do {
1324                 if (netdev->flags & IFF_PROMISC)
1325                         break;  /* accept all packets */
1326                 if (skb->pkt_type == PACKET_BROADCAST) {
1327                         if (netdev->flags & IFF_BROADCAST)
1328                                 break;  /* accept all broadcast packets */
1329                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1330                         if ((netdev->flags & IFF_MULTICAST) &&
1331                             (netdev_mc_count(netdev))) {
1332                                 struct netdev_hw_addr *ha;
1333                                 int found_mc = 0;
1334
1335                                 /* only accept multicast packets that we can
1336                                  * find in our multicast address list
1337                                  */
1338                                 netdev_for_each_mc_addr(ha, netdev) {
1339                                         if (ether_addr_equal(eth->h_dest,
1340                                                              ha->addr)) {
1341                                                 found_mc = 1;
1342                                                 break;
1343                                         }
1344                                 }
1345                                 /* accept pkt, dest matches a multicast addr */
1346                                 if (found_mc)
1347                                         break;
1348                         }
1349                 /* accept packet, h_dest must match vnic  mac address */
1350                 } else if (skb->pkt_type == PACKET_HOST) {
1351                         break;
1352                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1353                         /* something is not right */
1354                         dev_err(&devdata->netdev->dev,
1355                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1356                                 netdev->name, eth->h_dest, netdev->dev_addr);
1357                 }
1358                 /* drop packet - don't forward it up to OS */
1359                 devdata->n_rcv_packets_not_accepted++;
1360                 repost_return(cmdrsp, devdata, skb, netdev);
1361                 return 0;
1362         } while (0);
1363
1364         netif_receive_skb(skb);
1365         /* netif_rx returns various values, but "in practice most drivers
1366          * ignore the return value
1367          */
1368
1369         skb = NULL;
1370         /*
1371          * whether the packet got dropped or handled, the skb is freed by
1372          * kernel code, so we shouldn't free it. but we should repost a
1373          * new rcv buffer.
1374          */
1375         repost_return(cmdrsp, devdata, skb, netdev);
1376         return 1;
1377 }
1378
1379 /*
1380  *      devdata_initialize      - Initialize devdata structure
1381  *      @devdata: visornic_devdata structure to initialize
1382  *      #dev: visorbus_deviced it belongs to
1383  *
1384  *      Setup initial values for the visornic based on channel and default
1385  *      values.
1386  *      Returns a pointer to the devdata structure
1387  */
1388 static struct visornic_devdata *
1389 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1390 {
1391         devdata->dev = dev;
1392         devdata->incarnation_id = get_jiffies_64();
1393         return devdata;
1394 }
1395
1396 /*
1397  *      devdata_release - Frees up references in devdata
1398  *      @devdata: struct to clean up
1399  *
1400  *      Frees up references in devdata.
1401  *      Returns void
1402  */
1403 static void devdata_release(struct visornic_devdata *devdata)
1404 {
1405         kfree(devdata->rcvbuf);
1406         kfree(devdata->cmdrsp_rcv);
1407         kfree(devdata->xmit_cmdrsp);
1408 }
1409
1410 static const struct net_device_ops visornic_dev_ops = {
1411         .ndo_open = visornic_open,
1412         .ndo_stop = visornic_close,
1413         .ndo_start_xmit = visornic_xmit,
1414         .ndo_get_stats = visornic_get_stats,
1415         .ndo_change_mtu = visornic_change_mtu,
1416         .ndo_tx_timeout = visornic_xmit_timeout,
1417         .ndo_set_rx_mode = visornic_set_multi,
1418 };
1419
1420 /* DebugFS code */
1421 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1422                                  size_t len, loff_t *offset)
1423 {
1424         ssize_t bytes_read = 0;
1425         int str_pos = 0;
1426         struct visornic_devdata *devdata;
1427         struct net_device *dev;
1428         char *vbuf;
1429
1430         if (len > MAX_BUF)
1431                 len = MAX_BUF;
1432         vbuf = kzalloc(len, GFP_KERNEL);
1433         if (!vbuf)
1434                 return -ENOMEM;
1435
1436         /* for each vnic channel dump out channel specific data */
1437         rcu_read_lock();
1438         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1439                 /* Only consider netdevs that are visornic, and are open */
1440                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1441                     (!netif_queue_stopped(dev)))
1442                         continue;
1443
1444                 devdata = netdev_priv(dev);
1445                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1446                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1447                                      dev->name,
1448                                      dev,
1449                                      dev->dev_addr);
1450                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451                                      "VisorNic Dev Info = 0x%p\n", devdata);
1452                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1453                                      " num_rcv_bufs = %d\n",
1454                                      devdata->num_rcv_bufs);
1455                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1456                                      " max_outstanding_next_xmits = %lu\n",
1457                                     devdata->max_outstanding_net_xmits);
1458                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459                                      " upper_threshold_net_xmits = %lu\n",
1460                                      devdata->upper_threshold_net_xmits);
1461                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462                                      " lower_threshold_net_xmits = %lu\n",
1463                                      devdata->lower_threshold_net_xmits);
1464                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465                                      " queuefullmsg_logged = %d\n",
1466                                      devdata->queuefullmsg_logged);
1467                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468                                      " chstat.got_rcv = %lu\n",
1469                                      devdata->chstat.got_rcv);
1470                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471                                      " chstat.got_enbdisack = %lu\n",
1472                                      devdata->chstat.got_enbdisack);
1473                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474                                      " chstat.got_xmit_done = %lu\n",
1475                                      devdata->chstat.got_xmit_done);
1476                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477                                      " chstat.xmit_fail = %lu\n",
1478                                      devdata->chstat.xmit_fail);
1479                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480                                      " chstat.sent_enbdis = %lu\n",
1481                                      devdata->chstat.sent_enbdis);
1482                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                      " chstat.sent_promisc = %lu\n",
1484                                      devdata->chstat.sent_promisc);
1485                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486                                      " chstat.sent_post = %lu\n",
1487                                      devdata->chstat.sent_post);
1488                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                      " chstat.sent_post_failed = %lu\n",
1490                                      devdata->chstat.sent_post_failed);
1491                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                      " chstat.sent_xmit = %lu\n",
1493                                      devdata->chstat.sent_xmit);
1494                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                      " chstat.reject_count = %lu\n",
1496                                      devdata->chstat.reject_count);
1497                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                                      " chstat.extra_rcvbufs_sent = %lu\n",
1499                                      devdata->chstat.extra_rcvbufs_sent);
1500                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1502                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1504                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1506                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1508                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                      " num_rcvbuf_in_iovm = %d\n",
1510                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1511                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1512                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1513                                      devdata->alloc_failed_in_if_needed_cnt);
1514                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1516                                      devdata->alloc_failed_in_repost_rtn_cnt);
1517                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1519                  *                   devdata->inner_loop_limit_reached_cnt);
1520                  */
1521                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522                                      " found_repost_rcvbuf_cnt = %lu\n",
1523                                      devdata->found_repost_rcvbuf_cnt);
1524                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1525                                      " repost_found_skb_cnt = %lu\n",
1526                                      devdata->repost_found_skb_cnt);
1527                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528                                      " n_repost_deficit = %lu\n",
1529                                      devdata->n_repost_deficit);
1530                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531                                      " bad_rcv_buf = %lu\n",
1532                                      devdata->bad_rcv_buf);
1533                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534                                      " n_rcv_packets_not_accepted = %lu\n",
1535                                      devdata->n_rcv_packets_not_accepted);
1536                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537                                      " interrupts_rcvd = %llu\n",
1538                                      devdata->interrupts_rcvd);
1539                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540                                      " interrupts_notme = %llu\n",
1541                                      devdata->interrupts_notme);
1542                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543                                      " interrupts_disabled = %llu\n",
1544                                      devdata->interrupts_disabled);
1545                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546                                      " busy_cnt = %llu\n",
1547                                      devdata->busy_cnt);
1548                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549                                      " flow_control_upper_hits = %llu\n",
1550                                      devdata->flow_control_upper_hits);
1551                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552                                      " flow_control_lower_hits = %llu\n",
1553                                      devdata->flow_control_lower_hits);
1554                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555                                      " netif_queue = %s\n",
1556                                      netif_queue_stopped(devdata->netdev) ?
1557                                      "stopped" : "running");
1558                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559                                      " xmits_outstanding = %lu\n",
1560                                      devdata_xmits_outstanding(devdata));
1561         }
1562         rcu_read_unlock();
1563         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1564         kfree(vbuf);
1565         return bytes_read;
1566 }
1567
1568 static struct dentry *visornic_debugfs_dir;
1569 static const struct file_operations debugfs_info_fops = {
1570         .read = info_debugfs_read,
1571 };
1572
1573 /*
1574  *      send_rcv_posts_if_needed
1575  *      @devdata: visornic device
1576  *
1577  *      Send receive buffers to the IO Partition.
1578  *      Returns void
1579  */
1580 static int
1581 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1582 {
1583         int i;
1584         struct net_device *netdev;
1585         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1586         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1587         int err;
1588
1589         /* don't do this until vnic is marked ready */
1590         if (!(devdata->enabled && devdata->enab_dis_acked))
1591                 return 0;
1592
1593         netdev = devdata->netdev;
1594         rcv_bufs_allocated = 0;
1595         /* this code is trying to prevent getting stuck here forever,
1596          * but still retry it if you cant allocate them all this time.
1597          */
1598         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1599         while (cur_num_rcv_bufs_to_alloc > 0) {
1600                 cur_num_rcv_bufs_to_alloc--;
1601                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1602                         if (devdata->rcvbuf[i])
1603                                 continue;
1604                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1605                         if (!devdata->rcvbuf[i]) {
1606                                 devdata->alloc_failed_in_if_needed_cnt++;
1607                                 break;
1608                         }
1609                         rcv_bufs_allocated++;
1610                         err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1611                         if (err) {
1612                                 kfree_skb(devdata->rcvbuf[i]);
1613                                 devdata->rcvbuf[i] = NULL;
1614                                 break;
1615                         }
1616                         devdata->chstat.extra_rcvbufs_sent++;
1617                 }
1618         }
1619         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1620         return 0;
1621 }
1622
1623 /*
1624  *      drain_resp_queue  - drains and ignores all messages from the resp queue
1625  *      @cmdrsp: io channel command response message
1626  *      @devdata: visornic device to drain
1627  */
1628 static void
1629 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1630 {
1631         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1632                                           IOCHAN_FROM_IOPART,
1633                                           cmdrsp))
1634                 ;
1635 }
1636
1637 /*
1638  *      service_resp_queue      - drains the response queue
1639  *      @cmdrsp: io channel command response message
1640  *      @devdata: visornic device to drain
1641  *
1642  *      Drain the response queue of any responses from the IO partition.
1643  *      Process the responses as we get them.
1644  *      Returns when response queue is empty or when the thread stops.
1645  */
1646 static void
1647 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1648                    int *rx_work_done, int budget)
1649 {
1650         unsigned long flags;
1651         struct net_device *netdev;
1652
1653         while (*rx_work_done < budget) {
1654         /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1655          * moment
1656          */
1657                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1658                                               IOCHAN_FROM_IOPART,
1659                                               cmdrsp))
1660                         break; /* queue empty */
1661
1662                 switch (cmdrsp->net.type) {
1663                 case NET_RCV:
1664                         devdata->chstat.got_rcv++;
1665                         /* process incoming packet */
1666                         *rx_work_done += visornic_rx(cmdrsp);
1667                         break;
1668                 case NET_XMIT_DONE:
1669                         spin_lock_irqsave(&devdata->priv_lock, flags);
1670                         devdata->chstat.got_xmit_done++;
1671                         if (cmdrsp->net.xmtdone.xmt_done_result)
1672                                 devdata->chstat.xmit_fail++;
1673                         /* only call queue wake if we stopped it */
1674                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1675                         /* ASSERT netdev == vnicinfo->netdev; */
1676                         if ((netdev == devdata->netdev) &&
1677                             netif_queue_stopped(netdev)) {
1678                                 /* check if we have crossed the lower watermark
1679                                  * for netif_wake_queue()
1680                                  */
1681                                 if (vnic_hit_low_watermark
1682                                     (devdata,
1683                                      devdata->lower_threshold_net_xmits)) {
1684                                         /* enough NET_XMITs completed
1685                                          * so can restart netif queue
1686                                          */
1687                                         netif_wake_queue(netdev);
1688                                         devdata->flow_control_lower_hits++;
1689                                 }
1690                         }
1691                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1692                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1693                         kfree_skb(cmdrsp->net.buf);
1694                         break;
1695                 case NET_RCV_ENBDIS_ACK:
1696                         devdata->chstat.got_enbdisack++;
1697                         netdev = (struct net_device *)
1698                         cmdrsp->net.enbdis.context;
1699                         spin_lock_irqsave(&devdata->priv_lock, flags);
1700                         devdata->enab_dis_acked = 1;
1701                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1702
1703                         if (devdata->server_down &&
1704                             devdata->server_change_state) {
1705                                 /* Inform Linux that the link is up */
1706                                 devdata->server_down = false;
1707                                 devdata->server_change_state = false;
1708                                 netif_wake_queue(netdev);
1709                                 netif_carrier_on(netdev);
1710                         }
1711                         break;
1712                 case NET_CONNECT_STATUS:
1713                         netdev = devdata->netdev;
1714                         if (cmdrsp->net.enbdis.enable == 1) {
1715                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1716                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1717                                 spin_unlock_irqrestore(&devdata->priv_lock,
1718                                                        flags);
1719                                 netif_wake_queue(netdev);
1720                                 netif_carrier_on(netdev);
1721                         } else {
1722                                 netif_stop_queue(netdev);
1723                                 netif_carrier_off(netdev);
1724                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1725                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1726                                 spin_unlock_irqrestore(&devdata->priv_lock,
1727                                                        flags);
1728                         }
1729                         break;
1730                 default:
1731                         break;
1732                 }
1733                 /* cmdrsp is now available for reuse  */
1734         }
1735 }
1736
1737 static int visornic_poll(struct napi_struct *napi, int budget)
1738 {
1739         struct visornic_devdata *devdata = container_of(napi,
1740                                                         struct visornic_devdata,
1741                                                         napi);
1742         int rx_count = 0;
1743         int err;
1744
1745         err = send_rcv_posts_if_needed(devdata);
1746         if (err)
1747                 return err;
1748
1749         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1750
1751         /* If there aren't any more packets to receive stop the poll */
1752         if (rx_count < budget)
1753                 napi_complete_done(napi, rx_count);
1754
1755         return rx_count;
1756 }
1757
1758 /*
1759  *      poll_for_irq    - Checks the status of the response queue.
1760  *      @v: void pointer to the visronic devdata
1761  *
1762  *      Main function of the vnic_incoming thread. Periodically check the
1763  *      response queue and drain it if needed.
1764  *      Returns when thread has stopped.
1765  */
1766 static void
1767 poll_for_irq(unsigned long v)
1768 {
1769         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1770
1771         if (!visorchannel_signalempty(
1772                                    devdata->dev->visorchannel,
1773                                    IOCHAN_FROM_IOPART))
1774                 napi_schedule(&devdata->napi);
1775
1776         atomic_set(&devdata->interrupt_rcvd, 0);
1777
1778         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1779 }
1780
1781 /*
1782  *      visornic_probe  - probe function for visornic devices
1783  *      @dev: The visor device discovered
1784  *
1785  *      Called when visorbus discovers a visornic device on its
1786  *      bus. It creates a new visornic ethernet adapter.
1787  *      Returns 0 or negative for error.
1788  */
1789 static int visornic_probe(struct visor_device *dev)
1790 {
1791         struct visornic_devdata *devdata = NULL;
1792         struct net_device *netdev = NULL;
1793         int err;
1794         int channel_offset = 0;
1795         u64 features;
1796
1797         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1798         if (!netdev) {
1799                 dev_err(&dev->device,
1800                         "%s alloc_etherdev failed\n", __func__);
1801                 return -ENOMEM;
1802         }
1803
1804         netdev->netdev_ops = &visornic_dev_ops;
1805         netdev->watchdog_timeo = 5 * HZ;
1806         SET_NETDEV_DEV(netdev, &dev->device);
1807
1808         /* Get MAC address from channel and read it into the device. */
1809         netdev->addr_len = ETH_ALEN;
1810         channel_offset = offsetof(struct spar_io_channel_protocol,
1811                                   vnic.macaddr);
1812         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1813                                     ETH_ALEN);
1814         if (err < 0) {
1815                 dev_err(&dev->device,
1816                         "%s failed to get mac addr from chan (%d)\n",
1817                         __func__, err);
1818                 goto cleanup_netdev;
1819         }
1820
1821         devdata = devdata_initialize(netdev_priv(netdev), dev);
1822         if (!devdata) {
1823                 dev_err(&dev->device,
1824                         "%s devdata_initialize failed\n", __func__);
1825                 err = -ENOMEM;
1826                 goto cleanup_netdev;
1827         }
1828         /* don't trust messages laying around in the channel */
1829         drain_resp_queue(devdata->cmdrsp, devdata);
1830
1831         devdata->netdev = netdev;
1832         dev_set_drvdata(&dev->device, devdata);
1833         init_waitqueue_head(&devdata->rsp_queue);
1834         spin_lock_init(&devdata->priv_lock);
1835         devdata->enabled = 0; /* not yet */
1836         atomic_set(&devdata->usage, 1);
1837
1838         /* Setup rcv bufs */
1839         channel_offset = offsetof(struct spar_io_channel_protocol,
1840                                   vnic.num_rcv_bufs);
1841         err = visorbus_read_channel(dev, channel_offset,
1842                                     &devdata->num_rcv_bufs, 4);
1843         if (err) {
1844                 dev_err(&dev->device,
1845                         "%s failed to get #rcv bufs from chan (%d)\n",
1846                         __func__, err);
1847                 goto cleanup_netdev;
1848         }
1849
1850         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1851                                   sizeof(struct sk_buff *), GFP_KERNEL);
1852         if (!devdata->rcvbuf) {
1853                 err = -ENOMEM;
1854                 goto cleanup_netdev;
1855         }
1856
1857         /* set the net_xmit outstanding threshold */
1858         /* always leave two slots open but you should have 3 at a minimum */
1859         /* note that max_outstanding_net_xmits must be > 0 */
1860         devdata->max_outstanding_net_xmits =
1861                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1862         devdata->upper_threshold_net_xmits =
1863                 max_t(unsigned long,
1864                       2, (devdata->max_outstanding_net_xmits - 1));
1865         devdata->lower_threshold_net_xmits =
1866                 max_t(unsigned long,
1867                       1, (devdata->max_outstanding_net_xmits / 2));
1868
1869         skb_queue_head_init(&devdata->xmitbufhead);
1870
1871         /* create a cmdrsp we can use to post and unpost rcv buffers */
1872         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1873         if (!devdata->cmdrsp_rcv) {
1874                 err = -ENOMEM;
1875                 goto cleanup_rcvbuf;
1876         }
1877         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1878         if (!devdata->xmit_cmdrsp) {
1879                 err = -ENOMEM;
1880                 goto cleanup_cmdrsp_rcv;
1881         }
1882         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1883         devdata->server_down = false;
1884         devdata->server_change_state = false;
1885
1886         /*set the default mtu */
1887         channel_offset = offsetof(struct spar_io_channel_protocol,
1888                                   vnic.mtu);
1889         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1890         if (err) {
1891                 dev_err(&dev->device,
1892                         "%s failed to get mtu from chan (%d)\n",
1893                         __func__, err);
1894                 goto cleanup_xmit_cmdrsp;
1895         }
1896
1897         /* TODO: Setup Interrupt information */
1898         /* Let's start our threads to get responses */
1899         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1900
1901         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1902                     (unsigned long)devdata);
1903         /* Note: This time has to start running before the while
1904          * loop below because the napi routine is responsible for
1905          * setting enab_dis_acked
1906          */
1907         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1908
1909         channel_offset = offsetof(struct spar_io_channel_protocol,
1910                                   channel_header.features);
1911         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1912         if (err) {
1913                 dev_err(&dev->device,
1914                         "%s failed to get features from chan (%d)\n",
1915                         __func__, err);
1916                 goto cleanup_napi_add;
1917         }
1918
1919         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1920         features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1921         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1922         if (err) {
1923                 dev_err(&dev->device,
1924                         "%s failed to set features in chan (%d)\n",
1925                         __func__, err);
1926                 goto cleanup_napi_add;
1927         }
1928
1929         /* Note: Interrupts have to be enable before the while
1930          * loop below because the napi routine is responsible for
1931          * setting enab_dis_acked
1932          */
1933         visorbus_enable_channel_interrupts(dev);
1934
1935         err = register_netdev(netdev);
1936         if (err) {
1937                 dev_err(&dev->device,
1938                         "%s register_netdev failed (%d)\n", __func__, err);
1939                 goto cleanup_napi_add;
1940         }
1941
1942         /* create debug/sysfs directories */
1943         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1944                                                       visornic_debugfs_dir);
1945         if (!devdata->eth_debugfs_dir) {
1946                 dev_err(&dev->device,
1947                         "%s debugfs_create_dir %s failed\n",
1948                         __func__, netdev->name);
1949                 err = -ENOMEM;
1950                 goto cleanup_register_netdev;
1951         }
1952
1953         dev_info(&dev->device, "%s success netdev=%s\n",
1954                  __func__, netdev->name);
1955         return 0;
1956
1957 cleanup_register_netdev:
1958         unregister_netdev(netdev);
1959
1960 cleanup_napi_add:
1961         del_timer_sync(&devdata->irq_poll_timer);
1962         netif_napi_del(&devdata->napi);
1963
1964 cleanup_xmit_cmdrsp:
1965         kfree(devdata->xmit_cmdrsp);
1966
1967 cleanup_cmdrsp_rcv:
1968         kfree(devdata->cmdrsp_rcv);
1969
1970 cleanup_rcvbuf:
1971         kfree(devdata->rcvbuf);
1972
1973 cleanup_netdev:
1974         free_netdev(netdev);
1975         return err;
1976 }
1977
1978 /*
1979  *      host_side_disappeared   - IO part is gone.
1980  *      @devdata: device object
1981  *
1982  *      IO partition servicing this device is gone, do cleanup
1983  *      Returns void.
1984  */
1985 static void host_side_disappeared(struct visornic_devdata *devdata)
1986 {
1987         unsigned long flags;
1988
1989         spin_lock_irqsave(&devdata->priv_lock, flags);
1990         devdata->dev = NULL;   /* indicate device destroyed */
1991         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992 }
1993
1994 /*
1995  *      visornic_remove         - Called when visornic dev goes away
1996  *      @dev: visornic device that is being removed
1997  *
1998  *      Called when DEVICE_DESTROY gets called to remove device.
1999  *      Returns void
2000  */
2001 static void visornic_remove(struct visor_device *dev)
2002 {
2003         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2004         struct net_device *netdev;
2005         unsigned long flags;
2006
2007         if (!devdata) {
2008                 dev_err(&dev->device, "%s no devdata\n", __func__);
2009                 return;
2010         }
2011         spin_lock_irqsave(&devdata->priv_lock, flags);
2012         if (devdata->going_away) {
2013                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2014                 dev_err(&dev->device, "%s already being removed\n", __func__);
2015                 return;
2016         }
2017         devdata->going_away = true;
2018         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2019         netdev = devdata->netdev;
2020         if (!netdev) {
2021                 dev_err(&dev->device, "%s not net device\n", __func__);
2022                 return;
2023         }
2024
2025         /* going_away prevents new items being added to the workqueues */
2026         cancel_work_sync(&devdata->timeout_reset);
2027
2028         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2029
2030         unregister_netdev(netdev);  /* this will call visornic_close() */
2031
2032         del_timer_sync(&devdata->irq_poll_timer);
2033         netif_napi_del(&devdata->napi);
2034
2035         dev_set_drvdata(&dev->device, NULL);
2036         host_side_disappeared(devdata);
2037         devdata_release(devdata);
2038         free_netdev(netdev);
2039 }
2040
2041 /*
2042  *      visornic_pause          - Called when IO Part disappears
2043  *      @dev: visornic device that is being serviced
2044  *      @complete_func: call when finished.
2045  *
2046  *      Called when the IO Partition has gone down. Need to free
2047  *      up resources and wait for IO partition to come back. Mark
2048  *      link as down and don't attempt any DMA. When we have freed
2049  *      memory call the complete_func so that Command knows we are
2050  *      done. If we don't call complete_func, IO part will never
2051  *      come back.
2052  *      Returns 0 for success.
2053  */
2054 static int visornic_pause(struct visor_device *dev,
2055                           visorbus_state_complete_func complete_func)
2056 {
2057         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2058
2059         visornic_serverdown(devdata, complete_func);
2060         return 0;
2061 }
2062
2063 /*
2064  *      visornic_resume         - Called when IO part has recovered
2065  *      @dev: visornic device that is being serviced
2066  *      @compelte_func: call when finished
2067  *
2068  *      Called when the IO partition has recovered. Reestablish
2069  *      connection to the IO part and set the link up. Okay to do
2070  *      DMA again.
2071  *      Returns 0 for success.
2072  */
2073 static int visornic_resume(struct visor_device *dev,
2074                            visorbus_state_complete_func complete_func)
2075 {
2076         struct visornic_devdata *devdata;
2077         struct net_device *netdev;
2078         unsigned long flags;
2079
2080         devdata = dev_get_drvdata(&dev->device);
2081         if (!devdata) {
2082                 dev_err(&dev->device, "%s no devdata\n", __func__);
2083                 return -EINVAL;
2084         }
2085
2086         netdev = devdata->netdev;
2087
2088         spin_lock_irqsave(&devdata->priv_lock, flags);
2089         if (devdata->server_change_state) {
2090                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2091                 dev_err(&dev->device, "%s server already changing state\n",
2092                         __func__);
2093                 return -EINVAL;
2094         }
2095         if (!devdata->server_down) {
2096                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2097                 dev_err(&dev->device, "%s server not down\n", __func__);
2098                 complete_func(dev, 0);
2099                 return 0;
2100         }
2101         devdata->server_change_state = true;
2102         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2103
2104         /* Must transition channel to ATTACHED state BEFORE
2105          * we can start using the device again.
2106          * TODO: State transitions
2107          */
2108         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2109
2110         rtnl_lock();
2111         dev_open(netdev);
2112         rtnl_unlock();
2113
2114         complete_func(dev, 0);
2115         return 0;
2116 }
2117
2118 /* This is used to tell the visor bus driver which types of visor devices
2119  * we support, and what functions to call when a visor device that we support
2120  * is attached or removed.
2121  */
2122 static struct visor_driver visornic_driver = {
2123         .name = "visornic",
2124         .owner = THIS_MODULE,
2125         .channel_types = visornic_channel_types,
2126         .probe = visornic_probe,
2127         .remove = visornic_remove,
2128         .pause = visornic_pause,
2129         .resume = visornic_resume,
2130         .channel_interrupt = NULL,
2131 };
2132
2133 /*
2134  *      visornic_init   - Init function
2135  *
2136  *      Init function for the visornic driver. Do initial driver setup
2137  *      and wait for devices.
2138  *      Returns 0 for success, negative for error.
2139  */
2140 static int visornic_init(void)
2141 {
2142         struct dentry *ret;
2143         int err = -ENOMEM;
2144
2145         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2146         if (!visornic_debugfs_dir)
2147                 return err;
2148
2149         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2150                                   &debugfs_info_fops);
2151         if (!ret)
2152                 goto cleanup_debugfs;
2153         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2154                                   NULL, &debugfs_enable_ints_fops);
2155         if (!ret)
2156                 goto cleanup_debugfs;
2157
2158         err = visorbus_register_visor_driver(&visornic_driver);
2159         if (err)
2160                 goto cleanup_debugfs;
2161
2162         return 0;
2163
2164 cleanup_debugfs:
2165         debugfs_remove_recursive(visornic_debugfs_dir);
2166
2167         return err;
2168 }
2169
2170 /*
2171  *      visornic_cleanup        - driver exit routine
2172  *
2173  *      Unregister driver from the bus and free up memory.
2174  */
2175 static void visornic_cleanup(void)
2176 {
2177         visorbus_unregister_visor_driver(&visornic_driver);
2178
2179         debugfs_remove_recursive(visornic_debugfs_dir);
2180 }
2181
2182 module_init(visornic_init);
2183 module_exit(visornic_cleanup);
2184
2185 MODULE_AUTHOR("Unisys");
2186 MODULE_LICENSE("GPL");
2187 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");