drivers/atm/ambassador.c

   1 /*
   2   Madge Ambassador ATM Adapter driver.
   3   Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5   This program is free software; you can redistribute it and/or modify
   6   it under the terms of the GNU General Public License as published by
   7   the Free Software Foundation; either version 2 of the License, or
   8   (at your option) any later version.
   9
  10   This program is distributed in the hope that it will be useful,
  11   but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13   GNU General Public License for more details.
  14
  15   You should have received a copy of the GNU General Public License
  16   along with this program; if not, write to the Free Software
  17   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19   The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20   system and in the file COPYING in the Linux kernel source.
  21 */
  22
  23 /* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25 #include <linux/module.h>
  26 #include <linux/types.h>
  27 #include <linux/pci.h>
  28 #include <linux/kernel.h>
  29 #include <linux/init.h>
  30 #include <linux/ioport.h>
  31 #include <linux/atmdev.h>
  32 #include <linux/delay.h>
  33 #include <linux/interrupt.h>
  34 #include <linux/poison.h>
  35
  36 #include <asm/atomic.h>
  37 #include <asm/io.h>
  38 #include <asm/byteorder.h>
  39
  40 #include "ambassador.h"
  41
  42 #define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  43 #define description_string "Madge ATM Ambassador driver"
  44 #define version_string "1.2.4"
  45
  46 static inline void __init show_version (void) {
  47   printk ("%s version %s\n", description_string, version_string);
  48 }
  49
  50 /*
  51
  52   Theory of Operation
  53
  54   I Hardware, detection, initialisation and shutdown.
  55
  56   1. Supported Hardware
  57
  58   This driver is for the PCI ATMizer-based Ambassador card (except
  59   very early versions). It is not suitable for the similar EISA "TR7"
  60   card. Commercially, both cards are known as Collage Server ATM
  61   adapters.
  62
  63   The loader supports image transfer to the card, image start and few
  64   other miscellaneous commands.
  65
  66   Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  67
  68   The cards are big-endian.
  69
  70   2. Detection
  71
  72   Standard PCI stuff, the early cards are detected and rejected.
  73
  74   3. Initialisation
  75
  76   The cards are reset and the self-test results are checked. The
  77   microcode image is then transferred and started. This waits for a
  78   pointer to a descriptor containing details of the host-based queues
  79   and buffers and various parameters etc. Once they are processed
  80   normal operations may begin. The BIA is read using a microcode
  81   command.
  82
  83   4. Shutdown
  84
  85   This may be accomplished either by a card reset or via the microcode
  86   shutdown command. Further investigation required.
  87
  88   5. Persistent state
  89
  90   The card reset does not affect PCI configuration (good) or the
  91   contents of several other "shared run-time registers" (bad) which
  92   include doorbell and interrupt control as well as EEPROM and PCI
  93   control. The driver must be careful when modifying these registers
  94   not to touch bits it does not use and to undo any changes at exit.
  95
  96   II Driver software
  97
  98   0. Generalities
  99
 100   The adapter is quite intelligent (fast) and has a simple interface
 101   (few features). VPI is always zero, 1024 VCIs are supported. There
 102   is limited cell rate support. UBR channels can be capped and ABR
 103   (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 104   support.
 105
 106   1. Driver <-> Adapter Communication
 107
 108   Apart from the basic loader commands, the driver communicates
 109   through three entities: the command queue (CQ), the transmit queue
 110   pair (TXQ) and the receive queue pairs (RXQ). These three entities
 111   are set up by the host and passed to the microcode just after it has
 112   been started.
 113
 114   All queues are host-based circular queues. They are contiguous and
 115   (due to hardware limitations) have some restrictions as to their
 116   locations in (bus) memory. They are of the "full means the same as
 117   empty so don't do that" variety since the adapter uses pointers
 118   internally.
 119
 120   The queue pairs work as follows: one queue is for supply to the
 121   adapter, items in it are pending and are owned by the adapter; the
 122   other is the queue for return from the adapter, items in it have
 123   been dealt with by the adapter. The host adds items to the supply
 124   (TX descriptors and free RX buffer descriptors) and removes items
 125   from the return (TX and RX completions). The adapter deals with out
 126   of order completions.
 127
 128   Interrupts (card to host) and the doorbell (host to card) are used
 129   for signalling.
 130
 131   1. CQ
 132
 133   This is to communicate "open VC", "close VC", "get stats" etc. to
 134   the adapter. At most one command is retired every millisecond by the
 135   card. There is no out of order completion or notification. The
 136   driver needs to check the return code of the command, waiting as
 137   appropriate.
 138
 139   2. TXQ
 140
 141   TX supply items are of variable length (scatter gather support) and
 142   so the queue items are (more or less) pointers to the real thing.
 143   Each TX supply item contains a unique, host-supplied handle (the skb
 144   bus address seems most sensible as this works for Alphas as well,
 145   there is no need to do any endian conversions on the handles).
 146
 147   TX return items consist of just the handles above.
 148
 149   3. RXQ (up to 4 of these with different lengths and buffer sizes)
 150
 151   RX supply items consist of a unique, host-supplied handle (the skb
 152   bus address again) and a pointer to the buffer data area.
 153
 154   RX return items consist of the handle above, the VC, length and a
 155   status word. This just screams "oh so easy" doesn't it?
 156
 157   Note on RX pool sizes:
 158
 159   Each pool should have enough buffers to handle a back-to-back stream
 160   of minimum sized frames on a single VC. For example:
 161
 162     frame spacing = 3us (about right)
 163
 164     delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 165
 166     min number of buffers for one VC = 1 + delay/spacing (buffers)
 167
 168     delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 169
 170   The 20us delay assumes that there is no need to sleep; if we need to
 171   sleep to get buffers we are going to drop frames anyway.
 172
 173   In fact, each pool should have enough buffers to support the
 174   simultaneous reassembly of a separate frame on each VC and cope with
 175   the case in which frames complete in round robin cell fashion on
 176   each VC.
 177
 178   Only one frame can complete at each cell arrival, so if "n" VCs are
 179   open, the worst case is to have them all complete frames together
 180   followed by all starting new frames together.
 181
 182     desired number of buffers = n + delay/spacing
 183
 184   These are the extreme requirements, however, they are "n+k" for some
 185   "k" so we have only the constant to choose. This is the argument
 186   rx_lats which current defaults to 7.
 187
 188   Actually, "n ? n+k : 0" is better and this is what is implemented,
 189   subject to the limit given by the pool size.
 190
 191   4. Driver locking
 192
 193   Simple spinlocks are used around the TX and RX queue mechanisms.
 194   Anyone with a faster, working method is welcome to implement it.
 195
 196   The adapter command queue is protected with a spinlock. We always
 197   wait for commands to complete.
 198
 199   A more complex form of locking is used around parts of the VC open
 200   and close functions. There are three reasons for a lock: 1. we need
 201   to do atomic rate reservation and release (not used yet), 2. Opening
 202   sometimes involves two adapter commands which must not be separated
 203   by another command on the same VC, 3. the changes to RX pool size
 204   must be atomic. The lock needs to work over context switches, so we
 205   use a semaphore.
 206
 207   III Hardware Features and Microcode Bugs
 208
 209   1. Byte Ordering
 210
 211   *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 212
 213   2. Memory access
 214
 215   All structures that are not accessed using DMA must be 4-byte
 216   aligned (not a problem) and must not cross 4MB boundaries.
 217
 218   There is a DMA memory hole at E0000000-E00000FF (groan).
 219
 220   TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 221   but for a hardware bug).
 222
 223   RX buffers (DMA write) must not cross 16MB boundaries and must
 224   include spare trailing bytes up to the next 4-byte boundary; they
 225   will be written with rubbish.
 226
 227   The PLX likes to prefetch; if reading up to 4 u32 past the end of
 228   each TX fragment is not a problem, then TX can be made to go a
 229   little faster by passing a flag at init that disables a prefetch
 230   workaround. We do not pass this flag. (new microcode only)
 231
 232   Now we:
 233   . Note that alloc_skb rounds up size to a 16byte boundary.
 234   . Ensure all areas do not traverse 4MB boundaries.
 235   . Ensure all areas do not start at a E00000xx bus address.
 236   (I cannot be certain, but this may always hold with Linux)
 237   . Make all failures cause a loud message.
 238   . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 239   . Discard non-conforming TX fragment descriptors (the TX fails).
 240   In the future we could:
 241   . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 242   . Segment TX areas into some/more fragments, when necessary.
 243   . Relax checks for non-DMA items (ignore hole).
 244   . Give scatter-gather (iovec) requirements using ???. (?)
 245
 246   3. VC close is broken (only for new microcode)
 247
 248   The VC close adapter microcode command fails to do anything if any
 249   frames have been received on the VC but none have been transmitted.
 250   Frames continue to be reassembled and passed (with IRQ) to the
 251   driver.
 252
 253   IV To Do List
 254
 255   . Fix bugs!
 256
 257   . Timer code may be broken.
 258
 259   . Deal with buggy VC close (somehow) in microcode 12.
 260
 261   . Handle interrupted and/or non-blocking writes - is this a job for
 262     the protocol layer?
 263
 264   . Add code to break up TX fragments when they span 4MB boundaries.
 265
 266   . Add SUNI phy layer (need to know where SUNI lives on card).
 267
 268   . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 269     leave extra headroom space for Ambassador TX descriptors.
 270
 271   . Understand these elements of struct atm_vcc: recvq (proto?),
 272     sleep, callback, listenq, backlog_quota, reply and user_back.
 273
 274   . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 275
 276   . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 277
 278   . Decide whether RX buffer recycling is or can be made completely safe;
 279     turn it back on. It looks like Werner is going to axe this.
 280
 281   . Implement QoS changes on open VCs (involves extracting parts of VC open
 282     and close into separate functions and using them to make changes).
 283
 284   . Hack on command queue so that someone can issue multiple commands and wait
 285     on the last one (OR only "no-op" or "wait" commands are waited for).
 286
 287   . Eliminate need for while-schedule around do_command.
 288
 289 */
 290
 291 /********** microcode **********/
 292
 293 #ifdef AMB_NEW_MICROCODE
 294 #define UCODE(x) UCODE2(atmsar12.x)
 295 #else
 296 #define UCODE(x) UCODE2(atmsar11.x)
 297 #endif
 298 #define UCODE2(x) #x
 299
 300 static u32 __devinitdata ucode_start =
 301 #include UCODE(start)
 302 ;
 303
 304 static region __devinitdata ucode_regions[] = {
 305 #include UCODE(regions)
 306   { 0, 0 }
 307 };
 308
 309 static u32 __devinitdata ucode_data[] = {
 310 #include UCODE(data)
 311   0xdeadbeef
 312 };
 313
 314 static void do_housekeeping (unsigned long arg);
 315 /********** globals **********/
 316
 317 static unsigned short debug = 0;
 318 static unsigned int cmds = 8;
 319 static unsigned int txs = 32;
 320 static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 321 static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 322 static unsigned int rx_lats = 7;
 323 static unsigned char pci_lat = 0;
 324
 325 static const unsigned long onegigmask = -1 << 30;
 326
 327 /********** access to adapter **********/
 328
 329 static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 330   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 331 #ifdef AMB_MMIO
 332   dev->membase[addr / sizeof(u32)] = data;
 333 #else
 334   outl (data, dev->iobase + addr);
 335 #endif
 336 }
 337
 338 static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 339 #ifdef AMB_MMIO
 340   u32 data = dev->membase[addr / sizeof(u32)];
 341 #else
 342   u32 data = inl (dev->iobase + addr);
 343 #endif
 344   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 345   return data;
 346 }
 347
 348 static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 349   __be32 be = cpu_to_be32 (data);
 350   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 351 #ifdef AMB_MMIO
 352   dev->membase[addr / sizeof(u32)] = be;
 353 #else
 354   outl (be, dev->iobase + addr);
 355 #endif
 356 }
 357
 358 static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 359 #ifdef AMB_MMIO
 360   __be32 be = dev->membase[addr / sizeof(u32)];
 361 #else
 362   __be32 be = inl (dev->iobase + addr);
 363 #endif
 364   u32 data = be32_to_cpu (be);
 365   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 366   return data;
 367 }
 368
 369 /********** dump routines **********/
 370
 371 static inline void dump_registers (const amb_dev * dev) {
 372 #ifdef DEBUG_AMBASSADOR
 373   if (debug & DBG_REGS) {
 374     size_t i;
 375     PRINTD (DBG_REGS, "reading PLX control: ");
 376     for (i = 0x00; i < 0x30; i += sizeof(u32))
 377       rd_mem (dev, i);
 378     PRINTD (DBG_REGS, "reading mailboxes: ");
 379     for (i = 0x40; i < 0x60; i += sizeof(u32))
 380       rd_mem (dev, i);
 381     PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 382     for (i = 0x60; i < 0x70; i += sizeof(u32))
 383       rd_mem (dev, i);
 384   }
 385 #else
 386   (void) dev;
 387 #endif
 388   return;
 389 }
 390
 391 static inline void dump_loader_block (volatile loader_block * lb) {
 392 #ifdef DEBUG_AMBASSADOR
 393   unsigned int i;
 394   PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 395            lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 396   for (i = 0; i < MAX_COMMAND_DATA; ++i)
 397     PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 398   PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 399 #else
 400   (void) lb;
 401 #endif
 402   return;
 403 }
 404
 405 static inline void dump_command (command * cmd) {
 406 #ifdef DEBUG_AMBASSADOR
 407   unsigned int i;
 408   PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 409            cmd, /*be32_to_cpu*/ (cmd->request));
 410   for (i = 0; i < 3; ++i)
 411     PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 412   PRINTDE (DBG_CMD, "");
 413 #else
 414   (void) cmd;
 415 #endif
 416   return;
 417 }
 418
 419 static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 420 #ifdef DEBUG_AMBASSADOR
 421   unsigned int i;
 422   unsigned char * data = skb->data;
 423   PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 424   for (i=0; i<skb->len && i < 256;i++)
 425     PRINTDM (DBG_DATA, "%02x ", data[i]);
 426   PRINTDE (DBG_DATA,"");
 427 #else
 428   (void) prefix;
 429   (void) vc;
 430   (void) skb;
 431 #endif
 432   return;
 433 }
 434
 435 /********** check memory areas for use by Ambassador **********/
 436
 437 /* see limitations under Hardware Features */
 438
 439 static inline int check_area (void * start, size_t length) {
 440   // assumes length > 0
 441   const u32 fourmegmask = -1 << 22;
 442   const u32 twofivesixmask = -1 << 8;
 443   const u32 starthole = 0xE0000000;
 444   u32 startaddress = virt_to_bus (start);
 445   u32 lastaddress = startaddress+length-1;
 446   if ((startaddress ^ lastaddress) & fourmegmask ||
 447       (startaddress & twofivesixmask) == starthole) {
 448     PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 449             startaddress, lastaddress);
 450     return -1;
 451   } else {
 452     return 0;
 453   }
 454 }
 455
 456 /********** free an skb (as per ATM device driver documentation) **********/
 457
 458 static inline void amb_kfree_skb (struct sk_buff * skb) {
 459   if (ATM_SKB(skb)->vcc->pop) {
 460     ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 461   } else {
 462     dev_kfree_skb_any (skb);
 463   }
 464 }
 465
 466 /********** TX completion **********/
 467
 468 static inline void tx_complete (amb_dev * dev, tx_out * tx) {
 469   tx_simple * tx_descr = bus_to_virt (tx->handle);
 470   struct sk_buff * skb = tx_descr->skb;
 471
 472   PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 473
 474   // VC layer stats
 475   atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 476
 477   // free the descriptor
 478   kfree (tx_descr);
 479
 480   // free the skb
 481   amb_kfree_skb (skb);
 482
 483   dev->stats.tx_ok++;
 484   return;
 485 }
 486
 487 /********** RX completion **********/
 488
 489 static void rx_complete (amb_dev * dev, rx_out * rx) {
 490   struct sk_buff * skb = bus_to_virt (rx->handle);
 491   u16 vc = be16_to_cpu (rx->vc);
 492   // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 493   u16 status = be16_to_cpu (rx->status);
 494   u16 rx_len = be16_to_cpu (rx->length);
 495
 496   PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 497
 498   // XXX move this in and add to VC stats ???
 499   if (!status) {
 500     struct atm_vcc * atm_vcc = dev->rxer[vc];
 501     dev->stats.rx.ok++;
 502
 503     if (atm_vcc) {
 504
 505       if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 506
 507         if (atm_charge (atm_vcc, skb->truesize)) {
 508
 509           // prepare socket buffer
 510           ATM_SKB(skb)->vcc = atm_vcc;
 511           skb_put (skb, rx_len);
 512
 513           dump_skb ("<<<", vc, skb);
 514
 515           // VC layer stats
 516           atomic_inc(&atm_vcc->stats->rx);
 517           __net_timestamp(skb);
 518           // end of our responsability
 519           atm_vcc->push (atm_vcc, skb);
 520           return;
 521
 522         } else {
 523           // someone fix this (message), please!
 524           PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 525           // drop stats incremented in atm_charge
 526         }
 527
 528       } else {
 529         PRINTK (KERN_INFO, "dropped over-size frame");
 530         // should we count this?
 531         atomic_inc(&atm_vcc->stats->rx_drop);
 532       }
 533
 534     } else {
 535       PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 536       // this is an adapter bug, only in new version of microcode
 537     }
 538
 539   } else {
 540     dev->stats.rx.error++;
 541     if (status & CRC_ERR)
 542       dev->stats.rx.badcrc++;
 543     if (status & LEN_ERR)
 544       dev->stats.rx.toolong++;
 545     if (status & ABORT_ERR)
 546       dev->stats.rx.aborted++;
 547     if (status & UNUSED_ERR)
 548       dev->stats.rx.unused++;
 549   }
 550
 551   dev_kfree_skb_any (skb);
 552   return;
 553 }
 554
 555 /*
 556
 557   Note on queue handling.
 558
 559   Here "give" and "take" refer to queue entries and a queue (pair)
 560   rather than frames to or from the host or adapter. Empty frame
 561   buffers are given to the RX queue pair and returned unused or
 562   containing RX frames. TX frames (well, pointers to TX fragment
 563   lists) are given to the TX queue pair, completions are returned.
 564
 565 */
 566
 567 /********** command queue **********/
 568
 569 // I really don't like this, but it's the best I can do at the moment
 570
 571 // also, the callers are responsible for byte order as the microcode
 572 // sometimes does 16-bit accesses (yuk yuk yuk)
 573
 574 static int command_do (amb_dev * dev, command * cmd) {
 575   amb_cq * cq = &dev->cq;
 576   volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 577   command * my_slot;
 578
 579   PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 580
 581   if (test_bit (dead, &dev->flags))
 582     return 0;
 583
 584   spin_lock (&cq->lock);
 585
 586   // if not full...
 587   if (cq->pending < cq->maximum) {
 588     // remember my slot for later
 589     my_slot = ptrs->in;
 590     PRINTD (DBG_CMD, "command in slot %p", my_slot);
 591
 592     dump_command (cmd);
 593
 594     // copy command in
 595     *ptrs->in = *cmd;
 596     cq->pending++;
 597     ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 598
 599     // mail the command
 600     wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 601
 602     if (cq->pending > cq->high)
 603       cq->high = cq->pending;
 604     spin_unlock (&cq->lock);
 605
 606     // these comments were in a while-loop before, msleep removes the loop
 607     // go to sleep
 608     // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 609     msleep(cq->pending);
 610
 611     // wait for my slot to be reached (all waiters are here or above, until...)
 612     while (ptrs->out != my_slot) {
 613       PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 614       set_current_state(TASK_UNINTERRUPTIBLE);
 615       schedule();
 616     }
 617
 618     // wait on my slot (... one gets to its slot, and... )
 619     while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 620       PRINTD (DBG_CMD, "wait: command slot completion");
 621       set_current_state(TASK_UNINTERRUPTIBLE);
 622       schedule();
 623     }
 624
 625     PRINTD (DBG_CMD, "command complete");
 626     // update queue (... moves the queue along to the next slot)
 627     spin_lock (&cq->lock);
 628     cq->pending--;
 629     // copy command out
 630     *cmd = *ptrs->out;
 631     ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 632     spin_unlock (&cq->lock);
 633
 634     return 0;
 635   } else {
 636     cq->filled++;
 637     spin_unlock (&cq->lock);
 638     return -EAGAIN;
 639   }
 640
 641 }
 642
 643 /********** TX queue pair **********/
 644
 645 static inline int tx_give (amb_dev * dev, tx_in * tx) {
 646   amb_txq * txq = &dev->txq;
 647   unsigned long flags;
 648
 649   PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 650
 651   if (test_bit (dead, &dev->flags))
 652     return 0;
 653
 654   spin_lock_irqsave (&txq->lock, flags);
 655
 656   if (txq->pending < txq->maximum) {
 657     PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 658
 659     *txq->in.ptr = *tx;
 660     txq->pending++;
 661     txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 662     // hand over the TX and ring the bell
 663     wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 664     wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 665
 666     if (txq->pending > txq->high)
 667       txq->high = txq->pending;
 668     spin_unlock_irqrestore (&txq->lock, flags);
 669     return 0;
 670   } else {
 671     txq->filled++;
 672     spin_unlock_irqrestore (&txq->lock, flags);
 673     return -EAGAIN;
 674   }
 675 }
 676
 677 static inline int tx_take (amb_dev * dev) {
 678   amb_txq * txq = &dev->txq;
 679   unsigned long flags;
 680
 681   PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 682
 683   spin_lock_irqsave (&txq->lock, flags);
 684
 685   if (txq->pending && txq->out.ptr->handle) {
 686     // deal with TX completion
 687     tx_complete (dev, txq->out.ptr);
 688     // mark unused again
 689     txq->out.ptr->handle = 0;
 690     // remove item
 691     txq->pending--;
 692     txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 693
 694     spin_unlock_irqrestore (&txq->lock, flags);
 695     return 0;
 696   } else {
 697
 698     spin_unlock_irqrestore (&txq->lock, flags);
 699     return -1;
 700   }
 701 }
 702
 703 /********** RX queue pairs **********/
 704
 705 static inline int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 706   amb_rxq * rxq = &dev->rxq[pool];
 707   unsigned long flags;
 708
 709   PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 710
 711   spin_lock_irqsave (&rxq->lock, flags);
 712
 713   if (rxq->pending < rxq->maximum) {
 714     PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 715
 716     *rxq->in.ptr = *rx;
 717     rxq->pending++;
 718     rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 719     // hand over the RX buffer
 720     wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 721
 722     spin_unlock_irqrestore (&rxq->lock, flags);
 723     return 0;
 724   } else {
 725     spin_unlock_irqrestore (&rxq->lock, flags);
 726     return -1;
 727   }
 728 }
 729
 730 static inline int rx_take (amb_dev * dev, unsigned char pool) {
 731   amb_rxq * rxq = &dev->rxq[pool];
 732   unsigned long flags;
 733
 734   PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 735
 736   spin_lock_irqsave (&rxq->lock, flags);
 737
 738   if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 739     // deal with RX completion
 740     rx_complete (dev, rxq->out.ptr);
 741     // mark unused again
 742     rxq->out.ptr->status = 0;
 743     rxq->out.ptr->length = 0;
 744     // remove item
 745     rxq->pending--;
 746     rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 747
 748     if (rxq->pending < rxq->low)
 749       rxq->low = rxq->pending;
 750     spin_unlock_irqrestore (&rxq->lock, flags);
 751     return 0;
 752   } else {
 753     if (!rxq->pending && rxq->buffers_wanted)
 754       rxq->emptied++;
 755     spin_unlock_irqrestore (&rxq->lock, flags);
 756     return -1;
 757   }
 758 }
 759
 760 /********** RX Pool handling **********/
 761
 762 /* pre: buffers_wanted = 0, post: pending = 0 */
 763 static inline void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 764   amb_rxq * rxq = &dev->rxq[pool];
 765
 766   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 767
 768   if (test_bit (dead, &dev->flags))
 769     return;
 770
 771   /* we are not quite like the fill pool routines as we cannot just
 772      remove one buffer, we have to remove all of them, but we might as
 773      well pretend... */
 774   if (rxq->pending > rxq->buffers_wanted) {
 775     command cmd;
 776     cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 777     cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 778     while (command_do (dev, &cmd))
 779       schedule();
 780     /* the pool may also be emptied via the interrupt handler */
 781     while (rxq->pending > rxq->buffers_wanted)
 782       if (rx_take (dev, pool))
 783         schedule();
 784   }
 785
 786   return;
 787 }
 788
 789 static void drain_rx_pools (amb_dev * dev) {
 790   unsigned char pool;
 791
 792   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 793
 794   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 795     drain_rx_pool (dev, pool);
 796 }
 797
 798 static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
 799                                  gfp_t priority)
 800 {
 801   rx_in rx;
 802   amb_rxq * rxq;
 803
 804   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 805
 806   if (test_bit (dead, &dev->flags))
 807     return;
 808
 809   rxq = &dev->rxq[pool];
 810   while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 811
 812     struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 813     if (!skb) {
 814       PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 815       return;
 816     }
 817     if (check_area (skb->data, skb->truesize)) {
 818       dev_kfree_skb_any (skb);
 819       return;
 820     }
 821     // cast needed as there is no %? for pointer differences
 822     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 823             skb, skb->head, (long) (skb->end - skb->head));
 824     rx.handle = virt_to_bus (skb);
 825     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 826     if (rx_give (dev, &rx, pool))
 827       dev_kfree_skb_any (skb);
 828
 829   }
 830
 831   return;
 832 }
 833
 834 // top up all RX pools (can also be called as a bottom half)
 835 static void fill_rx_pools (amb_dev * dev) {
 836   unsigned char pool;
 837
 838   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 839
 840   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 841     fill_rx_pool (dev, pool, GFP_ATOMIC);
 842
 843   return;
 844 }
 845
 846 /********** enable host interrupts **********/
 847
 848 static inline void interrupts_on (amb_dev * dev) {
 849   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 850             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 851             | AMB_INTERRUPT_BITS);
 852 }
 853
 854 /********** disable host interrupts **********/
 855
 856 static inline void interrupts_off (amb_dev * dev) {
 857   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 858             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 859             &~ AMB_INTERRUPT_BITS);
 860 }
 861
 862 /********** interrupt handling **********/
 863
 864 static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 865   amb_dev * dev = dev_id;
 866
 867   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 868
 869   {
 870     u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 871
 872     // for us or someone else sharing the same interrupt
 873     if (!interrupt) {
 874       PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 875       return IRQ_NONE;
 876     }
 877
 878     // definitely for us
 879     PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 880     wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 881   }
 882
 883   {
 884     unsigned int irq_work = 0;
 885     unsigned char pool;
 886     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 887       while (!rx_take (dev, pool))
 888         ++irq_work;
 889     while (!tx_take (dev))
 890       ++irq_work;
 891
 892     if (irq_work) {
 893 #ifdef FILL_RX_POOLS_IN_BH
 894       schedule_work (&dev->bh);
 895 #else
 896       fill_rx_pools (dev);
 897 #endif
 898
 899       PRINTD (DBG_IRQ, "work done: %u", irq_work);
 900     } else {
 901       PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 902     }
 903   }
 904
 905   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 906   return IRQ_HANDLED;
 907 }
 908
 909 /********** make rate (not quite as much fun as Horizon) **********/
 910
 911 static int make_rate (unsigned int rate, rounding r,
 912                       u16 * bits, unsigned int * actual) {
 913   unsigned char exp = -1; // hush gcc
 914   unsigned int man = -1;  // hush gcc
 915
 916   PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 917
 918   // rates in cells per second, ITU format (nasty 16-bit floating-point)
 919   // given 5-bit e and 9-bit m:
 920   // rate = EITHER (1+m/2^9)*2^e    OR 0
 921   // bits = EITHER 1<<14 | e<<9 | m OR 0
 922   // (bit 15 is "reserved", bit 14 "non-zero")
 923   // smallest rate is 0 (special representation)
 924   // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 925   // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 926   // simple algorithm:
 927   // find position of top bit, this gives e
 928   // remove top bit and shift (rounding if feeling clever) by 9-e
 929
 930   // ucode bug: please don't set bit 14! so 0 rate not representable
 931
 932   if (rate > 0xffc00000U) {
 933     // larger than largest representable rate
 934
 935     if (r == round_up) {
 936         return -EINVAL;
 937     } else {
 938       exp = 31;
 939       man = 511;
 940     }
 941
 942   } else if (rate) {
 943     // representable rate
 944
 945     exp = 31;
 946     man = rate;
 947
 948     // invariant: rate = man*2^(exp-31)
 949     while (!(man & (1<<31))) {
 950       exp = exp - 1;
 951       man = man<<1;
 952     }
 953
 954     // man has top bit set
 955     // rate = (2^31+(man-2^31))*2^(exp-31)
 956     // rate = (1+(man-2^31)/2^31)*2^exp
 957     man = man<<1;
 958     man &= 0xffffffffU; // a nop on 32-bit systems
 959     // rate = (1+man/2^32)*2^exp
 960
 961     // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 962     // time to lose significance... we want m in the range 0 to 2^9-1
 963     // rounding presents a minor problem... we first decide which way
 964     // we are rounding (based on given rounding direction and possibly
 965     // the bits of the mantissa that are to be discarded).
 966
 967     switch (r) {
 968       case round_down: {
 969         // just truncate
 970         man = man>>(32-9);
 971         break;
 972       }
 973       case round_up: {
 974         // check all bits that we are discarding
 975         if (man & (-1>>9)) {
 976           man = (man>>(32-9)) + 1;
 977           if (man == (1<<9)) {
 978             // no need to check for round up outside of range
 979             man = 0;
 980             exp += 1;
 981           }
 982         } else {
 983           man = (man>>(32-9));
 984         }
 985         break;
 986       }
 987       case round_nearest: {
 988         // check msb that we are discarding
 989         if (man & (1<<(32-9-1))) {
 990           man = (man>>(32-9)) + 1;
 991           if (man == (1<<9)) {
 992             // no need to check for round up outside of range
 993             man = 0;
 994             exp += 1;
 995           }
 996         } else {
 997           man = (man>>(32-9));
 998         }
 999         break;
1000       }
1001     }
1002
1003   } else {
1004     // zero rate - not representable
1005
1006     if (r == round_down) {
1007       return -EINVAL;
1008     } else {
1009       exp = 0;
1010       man = 0;
1011     }
1012
1013   }
1014
1015   PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
1016
1017   if (bits)
1018     *bits = /* (1<<14) | */ (exp<<9) | man;
1019
1020   if (actual)
1021     *actual = (exp >= 9)
1022       ? (1 << exp) + (man << (exp-9))
1023       : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1024
1025   return 0;
1026 }
1027
1028 /********** Linux ATM Operations **********/
1029
1030 // some are not yet implemented while others do not make sense for
1031 // this device
1032
1033 /********** Open a VC **********/
1034
1035 static int amb_open (struct atm_vcc * atm_vcc)
1036 {
1037   int error;
1038
1039   struct atm_qos * qos;
1040   struct atm_trafprm * txtp;
1041   struct atm_trafprm * rxtp;
1042   u16 tx_rate_bits;
1043   u16 tx_vc_bits = -1; // hush gcc
1044   u16 tx_frame_bits = -1; // hush gcc
1045
1046   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1047   amb_vcc * vcc;
1048   unsigned char pool = -1; // hush gcc
1049   short vpi = atm_vcc->vpi;
1050   int vci = atm_vcc->vci;
1051
1052   PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1053
1054 #ifdef ATM_VPI_UNSPEC
1055   // UNSPEC is deprecated, remove this code eventually
1056   if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1057     PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1058     return -EINVAL;
1059   }
1060 #endif
1061
1062   if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1063         0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1064     PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1065     return -EINVAL;
1066   }
1067
1068   qos = &atm_vcc->qos;
1069
1070   if (qos->aal != ATM_AAL5) {
1071     PRINTD (DBG_QOS, "AAL not supported");
1072     return -EINVAL;
1073   }
1074
1075   // traffic parameters
1076
1077   PRINTD (DBG_QOS, "TX:");
1078   txtp = &qos->txtp;
1079   if (txtp->traffic_class != ATM_NONE) {
1080     switch (txtp->traffic_class) {
1081       case ATM_UBR: {
1082         // we take "the PCR" as a rate-cap
1083         int pcr = atm_pcr_goal (txtp);
1084         if (!pcr) {
1085           // no rate cap
1086           tx_rate_bits = 0;
1087           tx_vc_bits = TX_UBR;
1088           tx_frame_bits = TX_FRAME_NOTCAP;
1089         } else {
1090           rounding r;
1091           if (pcr < 0) {
1092             r = round_down;
1093             pcr = -pcr;
1094           } else {
1095             r = round_up;
1096           }
1097           error = make_rate (pcr, r, &tx_rate_bits, NULL);
1098           tx_vc_bits = TX_UBR_CAPPED;
1099           tx_frame_bits = TX_FRAME_CAPPED;
1100         }
1101         break;
1102       }
1103 #if 0
1104       case ATM_ABR: {
1105         pcr = atm_pcr_goal (txtp);
1106         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1107         break;
1108       }
1109 #endif
1110       default: {
1111         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1112         PRINTD (DBG_QOS, "request for non-UBR denied");
1113         return -EINVAL;
1114       }
1115     }
1116     PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1117             tx_rate_bits, tx_vc_bits);
1118   }
1119
1120   PRINTD (DBG_QOS, "RX:");
1121   rxtp = &qos->rxtp;
1122   if (rxtp->traffic_class == ATM_NONE) {
1123     // do nothing
1124   } else {
1125     // choose an RX pool (arranged in increasing size)
1126     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1127       if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1128         PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1129                 pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1130         break;
1131       }
1132     if (pool == NUM_RX_POOLS) {
1133       PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1134               "no pool suitable for VC (RX max_sdu %d is too large)",
1135               rxtp->max_sdu);
1136       return -EINVAL;
1137     }
1138
1139     switch (rxtp->traffic_class) {
1140       case ATM_UBR: {
1141         break;
1142       }
1143 #if 0
1144       case ATM_ABR: {
1145         pcr = atm_pcr_goal (rxtp);
1146         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1147         break;
1148       }
1149 #endif
1150       default: {
1151         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1152         PRINTD (DBG_QOS, "request for non-UBR denied");
1153         return -EINVAL;
1154       }
1155     }
1156   }
1157
1158   // get space for our vcc stuff
1159   vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1160   if (!vcc) {
1161     PRINTK (KERN_ERR, "out of memory!");
1162     return -ENOMEM;
1163   }
1164   atm_vcc->dev_data = (void *) vcc;
1165
1166   // no failures beyond this point
1167
1168   // we are not really "immediately before allocating the connection
1169   // identifier in hardware", but it will just have to do!
1170   set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1171
1172   if (txtp->traffic_class != ATM_NONE) {
1173     command cmd;
1174
1175     vcc->tx_frame_bits = tx_frame_bits;
1176
1177     down (&dev->vcc_sf);
1178     if (dev->rxer[vci]) {
1179       // RXer on the channel already, just modify rate...
1180       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1181       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1182       cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1183       while (command_do (dev, &cmd))
1184         schedule();
1185       // ... and TX flags, preserving the RX pool
1186       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1187       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1188       cmd.args.modify_flags.flags = cpu_to_be32
1189         ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1190           | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1191       while (command_do (dev, &cmd))
1192         schedule();
1193     } else {
1194       // no RXer on the channel, just open (with pool zero)
1195       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1196       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1197       cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1198       cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1199       while (command_do (dev, &cmd))
1200         schedule();
1201     }
1202     dev->txer[vci].tx_present = 1;
1203     up (&dev->vcc_sf);
1204   }
1205
1206   if (rxtp->traffic_class != ATM_NONE) {
1207     command cmd;
1208
1209     vcc->rx_info.pool = pool;
1210
1211     down (&dev->vcc_sf);
1212     /* grow RX buffer pool */
1213     if (!dev->rxq[pool].buffers_wanted)
1214       dev->rxq[pool].buffers_wanted = rx_lats;
1215     dev->rxq[pool].buffers_wanted += 1;
1216     fill_rx_pool (dev, pool, GFP_KERNEL);
1217
1218     if (dev->txer[vci].tx_present) {
1219       // TXer on the channel already
1220       // switch (from pool zero) to this pool, preserving the TX bits
1221       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1222       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1223       cmd.args.modify_flags.flags = cpu_to_be32
1224         ( (pool << SRB_POOL_SHIFT)
1225           | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1226     } else {
1227       // no TXer on the channel, open the VC (with no rate info)
1228       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1229       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1230       cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1231       cmd.args.open.rate = cpu_to_be32 (0);
1232     }
1233     while (command_do (dev, &cmd))
1234       schedule();
1235     // this link allows RX frames through
1236     dev->rxer[vci] = atm_vcc;
1237     up (&dev->vcc_sf);
1238   }
1239
1240   // indicate readiness
1241   set_bit(ATM_VF_READY,&atm_vcc->flags);
1242
1243   return 0;
1244 }
1245
1246 /********** Close a VC **********/
1247
1248 static void amb_close (struct atm_vcc * atm_vcc) {
1249   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1250   amb_vcc * vcc = AMB_VCC (atm_vcc);
1251   u16 vci = atm_vcc->vci;
1252
1253   PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1254
1255   // indicate unreadiness
1256   clear_bit(ATM_VF_READY,&atm_vcc->flags);
1257
1258   // disable TXing
1259   if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1260     command cmd;
1261
1262     down (&dev->vcc_sf);
1263     if (dev->rxer[vci]) {
1264       // RXer still on the channel, just modify rate... XXX not really needed
1265       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1266       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1267       cmd.args.modify_rate.rate = cpu_to_be32 (0);
1268       // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1269     } else {
1270       // no RXer on the channel, close channel
1271       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1272       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1273     }
1274     dev->txer[vci].tx_present = 0;
1275     while (command_do (dev, &cmd))
1276       schedule();
1277     up (&dev->vcc_sf);
1278   }
1279
1280   // disable RXing
1281   if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1282     command cmd;
1283
1284     // this is (the?) one reason why we need the amb_vcc struct
1285     unsigned char pool = vcc->rx_info.pool;
1286
1287     down (&dev->vcc_sf);
1288     if (dev->txer[vci].tx_present) {
1289       // TXer still on the channel, just go to pool zero XXX not really needed
1290       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1291       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1292       cmd.args.modify_flags.flags = cpu_to_be32
1293         (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1294     } else {
1295       // no TXer on the channel, close the VC
1296       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1297       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1298     }
1299     // forget the rxer - no more skbs will be pushed
1300     if (atm_vcc != dev->rxer[vci])
1301       PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1302               "arghhh! we're going to die!",
1303               vcc, dev->rxer[vci]);
1304     dev->rxer[vci] = NULL;
1305     while (command_do (dev, &cmd))
1306       schedule();
1307
1308     /* shrink RX buffer pool */
1309     dev->rxq[pool].buffers_wanted -= 1;
1310     if (dev->rxq[pool].buffers_wanted == rx_lats) {
1311       dev->rxq[pool].buffers_wanted = 0;
1312       drain_rx_pool (dev, pool);
1313     }
1314     up (&dev->vcc_sf);
1315   }
1316
1317   // free our structure
1318   kfree (vcc);
1319
1320   // say the VPI/VCI is free again
1321   clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1322
1323   return;
1324 }
1325
1326 /********** Set socket options for a VC **********/
1327
1328 // int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1329
1330 /********** Set socket options for a VC **********/
1331
1332 // int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1333
1334 /********** Send **********/
1335
1336 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1337   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1338   amb_vcc * vcc = AMB_VCC(atm_vcc);
1339   u16 vc = atm_vcc->vci;
1340   unsigned int tx_len = skb->len;
1341   unsigned char * tx_data = skb->data;
1342   tx_simple * tx_descr;
1343   tx_in tx;
1344
1345   if (test_bit (dead, &dev->flags))
1346     return -EIO;
1347
1348   PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1349           vc, tx_data, tx_len);
1350
1351   dump_skb (">>>", vc, skb);
1352
1353   if (!dev->txer[vc].tx_present) {
1354     PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1355     return -EBADFD;
1356   }
1357
1358   // this is a driver private field so we have to set it ourselves,
1359   // despite the fact that we are _required_ to use it to check for a
1360   // pop function
1361   ATM_SKB(skb)->vcc = atm_vcc;
1362
1363   if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1364     PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1365     return -EIO;
1366   }
1367
1368   if (check_area (skb->data, skb->len)) {
1369     atomic_inc(&atm_vcc->stats->tx_err);
1370     return -ENOMEM; // ?
1371   }
1372
1373   // allocate memory for fragments
1374   tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1375   if (!tx_descr) {
1376     PRINTK (KERN_ERR, "could not allocate TX descriptor");
1377     return -ENOMEM;
1378   }
1379   if (check_area (tx_descr, sizeof(tx_simple))) {
1380     kfree (tx_descr);
1381     return -ENOMEM;
1382   }
1383   PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1384
1385   tx_descr->skb = skb;
1386
1387   tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1388   tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1389
1390   tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1391   tx_descr->tx_frag_end.vc = 0;
1392   tx_descr->tx_frag_end.next_descriptor_length = 0;
1393   tx_descr->tx_frag_end.next_descriptor = 0;
1394 #ifdef AMB_NEW_MICROCODE
1395   tx_descr->tx_frag_end.cpcs_uu = 0;
1396   tx_descr->tx_frag_end.cpi = 0;
1397   tx_descr->tx_frag_end.pad = 0;
1398 #endif
1399
1400   tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1401   tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1402   tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1403
1404   while (tx_give (dev, &tx))
1405     schedule();
1406   return 0;
1407 }
1408
1409 /********** Change QoS on a VC **********/
1410
1411 // int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1412
1413 /********** Free RX Socket Buffer **********/
1414
1415 #if 0
1416 static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1417   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1418   amb_vcc * vcc = AMB_VCC (atm_vcc);
1419   unsigned char pool = vcc->rx_info.pool;
1420   rx_in rx;
1421
1422   // This may be unsafe for various reasons that I cannot really guess
1423   // at. However, I note that the ATM layer calls kfree_skb rather
1424   // than dev_kfree_skb at this point so we are least covered as far
1425   // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1426
1427   PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1428           skb, atm_vcc, vcc);
1429
1430   rx.handle = virt_to_bus (skb);
1431   rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1432
1433   skb->data = skb->head;
1434   skb->tail = skb->head;
1435   skb->len = 0;
1436
1437   if (!rx_give (dev, &rx, pool)) {
1438     // success
1439     PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1440     return;
1441   }
1442
1443   // just do what the ATM layer would have done
1444   dev_kfree_skb_any (skb);
1445
1446   return;
1447 }
1448 #endif
1449
1450 /********** Proc File Output **********/
1451
1452 static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1453   amb_dev * dev = AMB_DEV (atm_dev);
1454   int left = *pos;
1455   unsigned char pool;
1456
1457   PRINTD (DBG_FLOW, "amb_proc_read");
1458
1459   /* more diagnostics here? */
1460
1461   if (!left--) {
1462     amb_stats * s = &dev->stats;
1463     return sprintf (page,
1464                     "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1465                     "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1466                     s->tx_ok, s->rx.ok, s->rx.error,
1467                     s->rx.badcrc, s->rx.toolong,
1468                     s->rx.aborted, s->rx.unused);
1469   }
1470
1471   if (!left--) {
1472     amb_cq * c = &dev->cq;
1473     return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1474                     c->pending, c->high, c->maximum);
1475   }
1476
1477   if (!left--) {
1478     amb_txq * t = &dev->txq;
1479     return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1480                     t->pending, t->maximum, t->high, t->filled);
1481   }
1482
1483   if (!left--) {
1484     unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1485     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1486       amb_rxq * r = &dev->rxq[pool];
1487       count += sprintf (page+count, " %u/%u/%u %u %u",
1488                         r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1489     }
1490     count += sprintf (page+count, ".\n");
1491     return count;
1492   }
1493
1494   if (!left--) {
1495     unsigned int count = sprintf (page, "RX buffer sizes:");
1496     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1497       amb_rxq * r = &dev->rxq[pool];
1498       count += sprintf (page+count, " %u", r->buffer_size);
1499     }
1500     count += sprintf (page+count, ".\n");
1501     return count;
1502   }
1503
1504 #if 0
1505   if (!left--) {
1506     // suni block etc?
1507   }
1508 #endif
1509
1510   return 0;
1511 }
1512
1513 /********** Operation Structure **********/
1514
1515 static const struct atmdev_ops amb_ops = {
1516   .open         = amb_open,
1517   .close        = amb_close,
1518   .send         = amb_send,
1519   .proc_read    = amb_proc_read,
1520   .owner        = THIS_MODULE,
1521 };
1522
1523 /********** housekeeping **********/
1524 static void do_housekeeping (unsigned long arg) {
1525   amb_dev * dev = (amb_dev *) arg;
1526
1527   // could collect device-specific (not driver/atm-linux) stats here
1528
1529   // last resort refill once every ten seconds
1530   fill_rx_pools (dev);
1531   mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1532
1533   return;
1534 }
1535
1536 /********** creation of communication queues **********/
1537
1538 static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1539                                  unsigned int txs, unsigned int * rxs,
1540                                  unsigned int * rx_buffer_sizes) {
1541   unsigned char pool;
1542   size_t total = 0;
1543   void * memory;
1544   void * limit;
1545
1546   PRINTD (DBG_FLOW, "create_queues %p", dev);
1547
1548   total += cmds * sizeof(command);
1549
1550   total += txs * (sizeof(tx_in) + sizeof(tx_out));
1551
1552   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1553     total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1554
1555   memory = kmalloc (total, GFP_KERNEL);
1556   if (!memory) {
1557     PRINTK (KERN_ERR, "could not allocate queues");
1558     return -ENOMEM;
1559   }
1560   if (check_area (memory, total)) {
1561     PRINTK (KERN_ERR, "queues allocated in nasty area");
1562     kfree (memory);
1563     return -ENOMEM;
1564   }
1565
1566   limit = memory + total;
1567   PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1568
1569   PRINTD (DBG_CMD, "command queue at %p", memory);
1570
1571   {
1572     command * cmd = memory;
1573     amb_cq * cq = &dev->cq;
1574
1575     cq->pending = 0;
1576     cq->high = 0;
1577     cq->maximum = cmds - 1;
1578
1579     cq->ptrs.start = cmd;
1580     cq->ptrs.in = cmd;
1581     cq->ptrs.out = cmd;
1582     cq->ptrs.limit = cmd + cmds;
1583
1584     memory = cq->ptrs.limit;
1585   }
1586
1587   PRINTD (DBG_TX, "TX queue pair at %p", memory);
1588
1589   {
1590     tx_in * in = memory;
1591     tx_out * out;
1592     amb_txq * txq = &dev->txq;
1593
1594     txq->pending = 0;
1595     txq->high = 0;
1596     txq->filled = 0;
1597     txq->maximum = txs - 1;
1598
1599     txq->in.start = in;
1600     txq->in.ptr = in;
1601     txq->in.limit = in + txs;
1602
1603     memory = txq->in.limit;
1604     out = memory;
1605
1606     txq->out.start = out;
1607     txq->out.ptr = out;
1608     txq->out.limit = out + txs;
1609
1610     memory = txq->out.limit;
1611   }
1612
1613   PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1614
1615   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1616     rx_in * in = memory;
1617     rx_out * out;
1618     amb_rxq * rxq = &dev->rxq[pool];
1619
1620     rxq->buffer_size = rx_buffer_sizes[pool];
1621     rxq->buffers_wanted = 0;
1622
1623     rxq->pending = 0;
1624     rxq->low = rxs[pool] - 1;
1625     rxq->emptied = 0;
1626     rxq->maximum = rxs[pool] - 1;
1627
1628     rxq->in.start = in;
1629     rxq->in.ptr = in;
1630     rxq->in.limit = in + rxs[pool];
1631
1632     memory = rxq->in.limit;
1633     out = memory;
1634
1635     rxq->out.start = out;
1636     rxq->out.ptr = out;
1637     rxq->out.limit = out + rxs[pool];
1638
1639     memory = rxq->out.limit;
1640   }
1641
1642   if (memory == limit) {
1643     return 0;
1644   } else {
1645     PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1646     kfree (limit - total);
1647     return -ENOMEM;
1648   }
1649
1650 }
1651
1652 /********** destruction of communication queues **********/
1653
1654 static void destroy_queues (amb_dev * dev) {
1655   // all queues assumed empty
1656   void * memory = dev->cq.ptrs.start;
1657   // includes txq.in, txq.out, rxq[].in and rxq[].out
1658
1659   PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1660
1661   PRINTD (DBG_INIT, "freeing queues at %p", memory);
1662   kfree (memory);
1663
1664   return;
1665 }
1666
1667 /********** basic loader commands and error handling **********/
1668 // centisecond timeouts - guessing away here
1669 static unsigned int command_timeouts [] = {
1670         [host_memory_test]     = 15,
1671         [read_adapter_memory]  = 2,
1672         [write_adapter_memory] = 2,
1673         [adapter_start]        = 50,
1674         [get_version_number]   = 10,
1675         [interrupt_host]       = 1,
1676         [flash_erase_sector]   = 1,
1677         [adap_download_block]  = 1,
1678         [adap_erase_flash]     = 1,
1679         [adap_run_in_iram]     = 1,
1680         [adap_end_download]    = 1
1681 };
1682
1683
1684 static unsigned int command_successes [] = {
1685         [host_memory_test]     = COMMAND_PASSED_TEST,
1686         [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1687         [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1688         [adapter_start]        = COMMAND_COMPLETE,
1689         [get_version_number]   = COMMAND_COMPLETE,
1690         [interrupt_host]       = COMMAND_COMPLETE,
1691         [flash_erase_sector]   = COMMAND_COMPLETE,
1692         [adap_download_block]  = COMMAND_COMPLETE,
1693         [adap_erase_flash]     = COMMAND_COMPLETE,
1694         [adap_run_in_iram]     = COMMAND_COMPLETE,
1695         [adap_end_download]    = COMMAND_COMPLETE
1696 };
1697
1698 static  int decode_loader_result (loader_command cmd, u32 result)
1699 {
1700         int res;
1701         const char *msg;
1702
1703         if (result == command_successes[cmd])
1704                 return 0;
1705
1706         switch (result) {
1707                 case BAD_COMMAND:
1708                         res = -EINVAL;
1709                         msg = "bad command";
1710                         break;
1711                 case COMMAND_IN_PROGRESS:
1712                         res = -ETIMEDOUT;
1713                         msg = "command in progress";
1714                         break;
1715                 case COMMAND_PASSED_TEST:
1716                         res = 0;
1717                         msg = "command passed test";
1718                         break;
1719                 case COMMAND_FAILED_TEST:
1720                         res = -EIO;
1721                         msg = "command failed test";
1722                         break;
1723                 case COMMAND_READ_DATA_OK:
1724                         res = 0;
1725                         msg = "command read data ok";
1726                         break;
1727                 case COMMAND_READ_BAD_ADDRESS:
1728                         res = -EINVAL;
1729                         msg = "command read bad address";
1730                         break;
1731                 case COMMAND_WRITE_DATA_OK:
1732                         res = 0;
1733                         msg = "command write data ok";
1734                         break;
1735                 case COMMAND_WRITE_BAD_ADDRESS:
1736                         res = -EINVAL;
1737                         msg = "command write bad address";
1738                         break;
1739                 case COMMAND_WRITE_FLASH_FAILURE:
1740                         res = -EIO;
1741                         msg = "command write flash failure";
1742                         break;
1743                 case COMMAND_COMPLETE:
1744                         res = 0;
1745                         msg = "command complete";
1746                         break;
1747                 case COMMAND_FLASH_ERASE_FAILURE:
1748                         res = -EIO;
1749                         msg = "command flash erase failure";
1750                         break;
1751                 case COMMAND_WRITE_BAD_DATA:
1752                         res = -EINVAL;
1753                         msg = "command write bad data";
1754                         break;
1755                 default:
1756                         res = -EINVAL;
1757                         msg = "unknown error";
1758                         PRINTD (DBG_LOAD|DBG_ERR,
1759                                 "decode_loader_result got %d=%x !",
1760                                 result, result);
1761                         break;
1762         }
1763
1764         PRINTK (KERN_ERR, "%s", msg);
1765         return res;
1766 }
1767
1768 static int __devinit do_loader_command (volatile loader_block * lb,
1769                                      const amb_dev * dev, loader_command cmd) {
1770
1771   unsigned long timeout;
1772
1773   PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1774
1775   /* do a command
1776
1777      Set the return value to zero, set the command type and set the
1778      valid entry to the right magic value. The payload is already
1779      correctly byte-ordered so we leave it alone. Hit the doorbell
1780      with the bus address of this structure.
1781
1782   */
1783
1784   lb->result = 0;
1785   lb->command = cpu_to_be32 (cmd);
1786   lb->valid = cpu_to_be32 (DMA_VALID);
1787   // dump_registers (dev);
1788   // dump_loader_block (lb);
1789   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1790
1791   timeout = command_timeouts[cmd] * 10;
1792
1793   while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1794     if (timeout) {
1795       timeout = msleep_interruptible(timeout);
1796     } else {
1797       PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1798       dump_registers (dev);
1799       dump_loader_block (lb);
1800       return -ETIMEDOUT;
1801     }
1802
1803   if (cmd == adapter_start) {
1804     // wait for start command to acknowledge...
1805     timeout = 100;
1806     while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1807       if (timeout) {
1808         timeout = msleep_interruptible(timeout);
1809       } else {
1810         PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1811                 be32_to_cpu (lb->result));
1812         dump_registers (dev);
1813         return -ETIMEDOUT;
1814       }
1815     return 0;
1816   } else {
1817     return decode_loader_result (cmd, be32_to_cpu (lb->result));
1818   }
1819
1820 }
1821
1822 /* loader: determine loader version */
1823
1824 static int __devinit get_loader_version (loader_block * lb,
1825                                       const amb_dev * dev, u32 * version) {
1826   int res;
1827
1828   PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1829
1830   res = do_loader_command (lb, dev, get_version_number);
1831   if (res)
1832     return res;
1833   if (version)
1834     *version = be32_to_cpu (lb->payload.version);
1835   return 0;
1836 }
1837
1838 /* loader: write memory data blocks */
1839
1840 static int __devinit loader_write (loader_block * lb,
1841                                 const amb_dev * dev, const u32 * data,
1842                                 u32 address, unsigned int count) {
1843   unsigned int i;
1844   transfer_block * tb = &lb->payload.transfer;
1845
1846   PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1847
1848   if (count > MAX_TRANSFER_DATA)
1849     return -EINVAL;
1850   tb->address = cpu_to_be32 (address);
1851   tb->count = cpu_to_be32 (count);
1852   for (i = 0; i < count; ++i)
1853     tb->data[i] = cpu_to_be32 (data[i]);
1854   return do_loader_command (lb, dev, write_adapter_memory);
1855 }
1856
1857 /* loader: verify memory data blocks */
1858
1859 static int __devinit loader_verify (loader_block * lb,
1860                                  const amb_dev * dev, const u32 * data,
1861                                  u32 address, unsigned int count) {
1862   unsigned int i;
1863   transfer_block * tb = &lb->payload.transfer;
1864   int res;
1865
1866   PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1867
1868   if (count > MAX_TRANSFER_DATA)
1869     return -EINVAL;
1870   tb->address = cpu_to_be32 (address);
1871   tb->count = cpu_to_be32 (count);
1872   res = do_loader_command (lb, dev, read_adapter_memory);
1873   if (!res)
1874     for (i = 0; i < count; ++i)
1875       if (tb->data[i] != cpu_to_be32 (data[i])) {
1876         res = -EINVAL;
1877         break;
1878       }
1879   return res;
1880 }
1881
1882 /* loader: start microcode */
1883
1884 static int __devinit loader_start (loader_block * lb,
1885                                 const amb_dev * dev, u32 address) {
1886   PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1887
1888   lb->payload.start = cpu_to_be32 (address);
1889   return do_loader_command (lb, dev, adapter_start);
1890 }
1891
1892 /********** reset card **********/
1893
1894 static inline void sf (const char * msg)
1895 {
1896         PRINTK (KERN_ERR, "self-test failed: %s", msg);
1897 }
1898
1899 static int amb_reset (amb_dev * dev, int diags) {
1900   u32 word;
1901
1902   PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1903
1904   word = rd_plain (dev, offsetof(amb_mem, reset_control));
1905   // put card into reset state
1906   wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1907   // wait a short while
1908   udelay (10);
1909 #if 1
1910   // put card into known good state
1911   wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1912   // clear all interrupts just in case
1913   wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1914 #endif
1915   // clear self-test done flag
1916   wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1917   // take card out of reset state
1918   wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1919
1920   if (diags) {
1921     unsigned long timeout;
1922     // 4.2 second wait
1923     msleep(4200);
1924     // half second time-out
1925     timeout = 500;
1926     while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1927       if (timeout) {
1928         timeout = msleep_interruptible(timeout);
1929       } else {
1930         PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1931         return -ETIMEDOUT;
1932       }
1933
1934     // get results of self-test
1935     // XXX double check byte-order
1936     word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1937     if (word & SELF_TEST_FAILURE) {
1938       if (word & GPINT_TST_FAILURE)
1939         sf ("interrupt");
1940       if (word & SUNI_DATA_PATTERN_FAILURE)
1941         sf ("SUNI data pattern");
1942       if (word & SUNI_DATA_BITS_FAILURE)
1943         sf ("SUNI data bits");
1944       if (word & SUNI_UTOPIA_FAILURE)
1945         sf ("SUNI UTOPIA interface");
1946       if (word & SUNI_FIFO_FAILURE)
1947         sf ("SUNI cell buffer FIFO");
1948       if (word & SRAM_FAILURE)
1949         sf ("bad SRAM");
1950       // better return value?
1951       return -EIO;
1952     }
1953
1954   }
1955   return 0;
1956 }
1957
1958 /********** transfer and start the microcode **********/
1959
1960 static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1961   unsigned int i = 0;
1962   unsigned int total = 0;
1963   const u32 * pointer = ucode_data;
1964   u32 address;
1965   unsigned int count;
1966   int res;
1967
1968   PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1969
1970   while (address = ucode_regions[i].start,
1971          count = ucode_regions[i].count) {
1972     PRINTD (DBG_LOAD, "starting region (%x, %u)", address, count);
1973     while (count) {
1974       unsigned int words;
1975       if (count <= MAX_TRANSFER_DATA)
1976         words = count;
1977       else
1978         words = MAX_TRANSFER_DATA;
1979       total += words;
1980       res = loader_write (lb, dev, pointer, address, words);
1981       if (res)
1982         return res;
1983       res = loader_verify (lb, dev, pointer, address, words);
1984       if (res)
1985         return res;
1986       count -= words;
1987       address += sizeof(u32) * words;
1988       pointer += words;
1989     }
1990     i += 1;
1991   }
1992   if (*pointer == ATM_POISON) {
1993     return loader_start (lb, dev, ucode_start);
1994   } else {
1995     // cast needed as there is no %? for pointer differnces
1996     PRINTD (DBG_LOAD|DBG_ERR,
1997             "offset=%li, *pointer=%x, address=%x, total=%u",
1998             (long) (pointer - ucode_data), *pointer, address, total);
1999     PRINTK (KERN_ERR, "incorrect microcode data");
2000     return -ENOMEM;
2001   }
2002 }
2003
2004 /********** give adapter parameters **********/
2005
2006 static inline __be32 bus_addr(void * addr) {
2007     return cpu_to_be32 (virt_to_bus (addr));
2008 }
2009
2010 static int __devinit amb_talk (amb_dev * dev) {
2011   adap_talk_block a;
2012   unsigned char pool;
2013   unsigned long timeout;
2014
2015   PRINTD (DBG_FLOW, "amb_talk %p", dev);
2016
2017   a.command_start = bus_addr (dev->cq.ptrs.start);
2018   a.command_end   = bus_addr (dev->cq.ptrs.limit);
2019   a.tx_start      = bus_addr (dev->txq.in.start);
2020   a.tx_end        = bus_addr (dev->txq.in.limit);
2021   a.txcom_start   = bus_addr (dev->txq.out.start);
2022   a.txcom_end     = bus_addr (dev->txq.out.limit);
2023
2024   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2025     // the other "a" items are set up by the adapter
2026     a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2027     a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2028     a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2029     a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2030     a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2031   }
2032
2033 #ifdef AMB_NEW_MICROCODE
2034   // disable fast PLX prefetching
2035   a.init_flags = 0;
2036 #endif
2037
2038   // pass the structure
2039   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2040
2041   // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2042   msleep(2200);
2043   // give the adapter another half second?
2044   timeout = 500;
2045   while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2046     if (timeout) {
2047       timeout = msleep_interruptible(timeout);
2048     } else {
2049       PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2050       return -ETIMEDOUT;
2051     }
2052
2053   return 0;
2054 }
2055
2056 // get microcode version
2057 static void __devinit amb_ucode_version (amb_dev * dev) {
2058   u32 major;
2059   u32 minor;
2060   command cmd;
2061   cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2062   while (command_do (dev, &cmd)) {
2063     set_current_state(TASK_UNINTERRUPTIBLE);
2064     schedule();
2065   }
2066   major = be32_to_cpu (cmd.args.version.major);
2067   minor = be32_to_cpu (cmd.args.version.minor);
2068   PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2069 }
2070
2071 // swap bits within byte to get Ethernet ordering
2072 static u8 bit_swap (u8 byte)
2073 {
2074     const u8 swap[] = {
2075       0x0, 0x8, 0x4, 0xc,
2076       0x2, 0xa, 0x6, 0xe,
2077       0x1, 0x9, 0x5, 0xd,
2078       0x3, 0xb, 0x7, 0xf
2079     };
2080     return ((swap[byte & 0xf]<<4) | swap[byte>>4]);
2081 }
2082
2083 // get end station address
2084 static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2085   u32 lower4;
2086   u16 upper2;
2087   command cmd;
2088
2089   cmd.request = cpu_to_be32 (SRB_GET_BIA);
2090   while (command_do (dev, &cmd)) {
2091     set_current_state(TASK_UNINTERRUPTIBLE);
2092     schedule();
2093   }
2094   lower4 = be32_to_cpu (cmd.args.bia.lower4);
2095   upper2 = be32_to_cpu (cmd.args.bia.upper2);
2096   PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2097
2098   if (esi) {
2099     unsigned int i;
2100
2101     PRINTDB (DBG_INIT, "ESI:");
2102     for (i = 0; i < ESI_LEN; ++i) {
2103       if (i < 4)
2104           esi[i] = bit_swap (lower4>>(8*i));
2105       else
2106           esi[i] = bit_swap (upper2>>(8*(i-4)));
2107       PRINTDM (DBG_INIT, " %02x", esi[i]);
2108     }
2109
2110     PRINTDE (DBG_INIT, "");
2111   }
2112
2113   return;
2114 }
2115
2116 static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2117 {
2118         // fix up the PLX-mapped window base address to match the block
2119         unsigned long blb;
2120         u32 mapreg;
2121         blb = virt_to_bus(lb);
2122         // the kernel stack had better not ever cross a 1Gb boundary!
2123         mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2124         mapreg &= ~onegigmask;
2125         mapreg |= blb & onegigmask;
2126         wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2127         return;
2128 }
2129
2130 static int __devinit amb_init (amb_dev * dev)
2131 {
2132   loader_block lb;
2133
2134   u32 version;
2135
2136   if (amb_reset (dev, 1)) {
2137     PRINTK (KERN_ERR, "card reset failed!");
2138   } else {
2139     fixup_plx_window (dev, &lb);
2140
2141     if (get_loader_version (&lb, dev, &version)) {
2142       PRINTK (KERN_INFO, "failed to get loader version");
2143     } else {
2144       PRINTK (KERN_INFO, "loader version is %08x", version);
2145
2146       if (ucode_init (&lb, dev)) {
2147         PRINTK (KERN_ERR, "microcode failure");
2148       } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2149         PRINTK (KERN_ERR, "failed to get memory for queues");
2150       } else {
2151
2152         if (amb_talk (dev)) {
2153           PRINTK (KERN_ERR, "adapter did not accept queues");
2154         } else {
2155
2156           amb_ucode_version (dev);
2157           return 0;
2158
2159         } /* amb_talk */
2160
2161         destroy_queues (dev);
2162       } /* create_queues, ucode_init */
2163
2164       amb_reset (dev, 0);
2165     } /* get_loader_version */
2166
2167   } /* amb_reset */
2168
2169   return -EINVAL;
2170 }
2171
2172 static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
2173 {
2174       unsigned char pool;
2175       memset (dev, 0, sizeof(amb_dev));
2176
2177       // set up known dev items straight away
2178       dev->pci_dev = pci_dev;
2179       pci_set_drvdata(pci_dev, dev);
2180
2181       dev->iobase = pci_resource_start (pci_dev, 1);
2182       dev->irq = pci_dev->irq;
2183       dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2184
2185       // flags (currently only dead)
2186       dev->flags = 0;
2187
2188       // Allocate cell rates (fibre)
2189       // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2190       // to be really pedantic, this should be ATM_OC3c_PCR
2191       dev->tx_avail = ATM_OC3_PCR;
2192       dev->rx_avail = ATM_OC3_PCR;
2193
2194 #ifdef FILL_RX_POOLS_IN_BH
2195       // initialise bottom half
2196       INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2197 #endif
2198
2199       // semaphore for txer/rxer modifications - we cannot use a
2200       // spinlock as the critical region needs to switch processes
2201       init_MUTEX (&dev->vcc_sf);
2202       // queue manipulation spinlocks; we want atomic reads and
2203       // writes to the queue descriptors (handles IRQ and SMP)
2204       // consider replacing "int pending" -> "atomic_t available"
2205       // => problem related to who gets to move queue pointers
2206       spin_lock_init (&dev->cq.lock);
2207       spin_lock_init (&dev->txq.lock);
2208       for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2209         spin_lock_init (&dev->rxq[pool].lock);
2210 }
2211
2212 static void setup_pci_dev(struct pci_dev *pci_dev)
2213 {
2214         unsigned char lat;
2215
2216         // enable bus master accesses
2217         pci_set_master(pci_dev);
2218
2219         // frobnicate latency (upwards, usually)
2220         pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2221
2222         if (!pci_lat)
2223                 pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2224
2225         if (lat != pci_lat) {
2226                 PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2227                         lat, pci_lat);
2228                 pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2229         }
2230 }
2231
2232 static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2233 {
2234         amb_dev * dev;
2235         int err;
2236         unsigned int irq;
2237
2238         err = pci_enable_device(pci_dev);
2239         if (err < 0) {
2240                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2241                 goto out;
2242         }
2243
2244         // read resources from PCI configuration space
2245         irq = pci_dev->irq;
2246
2247         if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2248                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2249                 err = -EINVAL;
2250                 goto out_disable;
2251         }
2252
2253         PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2254                 " IO %llx, IRQ %u, MEM %p",
2255                 (unsigned long long)pci_resource_start(pci_dev, 1),
2256                 irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2257
2258         // check IO region
2259         err = pci_request_region(pci_dev, 1, DEV_LABEL);
2260         if (err < 0) {
2261                 PRINTK (KERN_ERR, "IO range already in use!");
2262                 goto out_disable;
2263         }
2264
2265         dev = kmalloc (sizeof(amb_dev), GFP_KERNEL);
2266         if (!dev) {
2267                 PRINTK (KERN_ERR, "out of memory!");
2268                 err = -ENOMEM;
2269                 goto out_release;
2270         }
2271
2272         setup_dev(dev, pci_dev);
2273
2274         err = amb_init(dev);
2275         if (err < 0) {
2276                 PRINTK (KERN_ERR, "adapter initialisation failure");
2277                 goto out_free;
2278         }
2279
2280         setup_pci_dev(pci_dev);
2281
2282         // grab (but share) IRQ and install handler
2283         err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2284         if (err < 0) {
2285                 PRINTK (KERN_ERR, "request IRQ failed!");
2286                 goto out_reset;
2287         }
2288
2289         dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
2290         if (!dev->atm_dev) {
2291                 PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2292                 err = -EINVAL;
2293                 goto out_free_irq;
2294         }
2295
2296         PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2297                 dev->atm_dev->number, dev, dev->atm_dev);
2298                 dev->atm_dev->dev_data = (void *) dev;
2299
2300         // register our address
2301         amb_esi (dev, dev->atm_dev->esi);
2302
2303         // 0 bits for vpi, 10 bits for vci
2304         dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2305         dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2306
2307         init_timer(&dev->housekeeping);
2308         dev->housekeeping.function = do_housekeeping;
2309         dev->housekeeping.data = (unsigned long) dev;
2310         mod_timer(&dev->housekeeping, jiffies);
2311
2312         // enable host interrupts
2313         interrupts_on (dev);
2314
2315 out:
2316         return err;
2317
2318 out_free_irq:
2319         free_irq(irq, dev);
2320 out_reset:
2321         amb_reset(dev, 0);
2322 out_free:
2323         kfree(dev);
2324 out_release:
2325         pci_release_region(pci_dev, 1);
2326 out_disable:
2327         pci_disable_device(pci_dev);
2328         goto out;
2329 }
2330
2331
2332 static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2333 {
2334         struct amb_dev *dev;
2335
2336         dev = pci_get_drvdata(pci_dev);
2337
2338         PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2339         del_timer_sync(&dev->housekeeping);
2340         // the drain should not be necessary
2341         drain_rx_pools(dev);
2342         interrupts_off(dev);
2343         amb_reset(dev, 0);
2344         free_irq(dev->irq, dev);
2345         pci_disable_device(pci_dev);
2346         destroy_queues(dev);
2347         atm_dev_deregister(dev->atm_dev);
2348         kfree(dev);
2349         pci_release_region(pci_dev, 1);
2350 }
2351
2352 static void __init amb_check_args (void) {
2353   unsigned char pool;
2354   unsigned int max_rx_size;
2355
2356 #ifdef DEBUG_AMBASSADOR
2357   PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2358 #else
2359   if (debug)
2360     PRINTK (KERN_NOTICE, "no debugging support");
2361 #endif
2362
2363   if (cmds < MIN_QUEUE_SIZE)
2364     PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2365             cmds = MIN_QUEUE_SIZE);
2366
2367   if (txs < MIN_QUEUE_SIZE)
2368     PRINTK (KERN_NOTICE, "txs has been raised to %u",
2369             txs = MIN_QUEUE_SIZE);
2370
2371   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2372     if (rxs[pool] < MIN_QUEUE_SIZE)
2373       PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2374               pool, rxs[pool] = MIN_QUEUE_SIZE);
2375
2376   // buffers sizes should be greater than zero and strictly increasing
2377   max_rx_size = 0;
2378   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2379     if (rxs_bs[pool] <= max_rx_size)
2380       PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2381               pool, rxs_bs[pool]);
2382     else
2383       max_rx_size = rxs_bs[pool];
2384
2385   if (rx_lats < MIN_RX_BUFFERS)
2386     PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2387             rx_lats = MIN_RX_BUFFERS);
2388
2389   return;
2390 }
2391
2392 /********** module stuff **********/
2393
2394 MODULE_AUTHOR(maintainer_string);
2395 MODULE_DESCRIPTION(description_string);
2396 MODULE_LICENSE("GPL");
2397 module_param(debug,   ushort, 0644);
2398 module_param(cmds,    uint, 0);
2399 module_param(txs,     uint, 0);
2400 module_param_array(rxs,     uint, NULL, 0);
2401 module_param_array(rxs_bs,  uint, NULL, 0);
2402 module_param(rx_lats, uint, 0);
2403 module_param(pci_lat, byte, 0);
2404 MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2405 MODULE_PARM_DESC(cmds,    "number of command queue entries");
2406 MODULE_PARM_DESC(txs,     "number of TX queue entries");
2407 MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2408 MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2409 MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2410 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2411
2412 /********** module entry **********/
2413
2414 static struct pci_device_id amb_pci_tbl[] = {
2415         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR, PCI_ANY_ID, PCI_ANY_ID,
2416           0, 0, 0 },
2417         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD, PCI_ANY_ID, PCI_ANY_ID,
2418           0, 0, 0 },
2419         { 0, }
2420 };
2421
2422 MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2423
2424 static struct pci_driver amb_driver = {
2425         .name =         "amb",
2426         .probe =        amb_probe,
2427         .remove =       __devexit_p(amb_remove_one),
2428         .id_table =     amb_pci_tbl,
2429 };
2430
2431 static int __init amb_module_init (void)
2432 {
2433   PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2434
2435   // sanity check - cast needed as printk does not support %Zu
2436   if (sizeof(amb_mem) != 4*16 + 4*12) {
2437     PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2438             (unsigned long) sizeof(amb_mem));
2439     return -ENOMEM;
2440   }
2441
2442   show_version();
2443
2444   amb_check_args();
2445
2446   // get the juice
2447   return pci_register_driver(&amb_driver);
2448 }
2449
2450 /********** module exit **********/
2451
2452 static void __exit amb_module_exit (void)
2453 {
2454   PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2455
2456   return pci_unregister_driver(&amb_driver);
2457 }
2458
2459 module_init(amb_module_init);
2460 module_exit(amb_module_exit);