]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'dmaengine' into async-tx-next
authorDan Williams <dan.j.williams@intel.com>
Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
committerDan Williams <dan.j.williams@intel.com>
Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
Conflicts:
crypto/async_tx/async_xor.c
drivers/dma/ioat/dma_v2.h
drivers/dma/ioat/pci.c
drivers/md/raid5.c

15 files changed:
1  2 
arch/arm/include/asm/hardware/iop_adma.h
drivers/dma/Kconfig
drivers/dma/dmaengine.c
drivers/dma/dmatest.c
drivers/dma/ioat/dma.c
drivers/dma/ioat/dma.h
drivers/dma/ioat/dma_v2.c
drivers/dma/ioat/dma_v2.h
drivers/dma/ioat/pci.c
drivers/dma/iop-adma.c
drivers/md/Kconfig
drivers/md/raid5.c
drivers/md/raid5.h
include/linux/dmaengine.h
include/linux/pci_ids.h

index bbe8a0475cadb24410a73ae62b8f84cf71154cc8,95dc133d0a7f91d1fbc08ba98d6bdd75c2f2bd0d..59b8c3892f76731608b346d0d100910e9047c481
@@@ -86,6 -86,7 +86,7 @@@ struct iop_adma_chan 
   * @idx: pool index
   * @unmap_src_cnt: number of xor sources
   * @unmap_len: transaction bytecount
+  * @tx_list: list of descriptors that are associated with one operation
   * @async_tx: support for the async_tx api
   * @group_list: list of slots that make up a multi-descriptor transaction
   *    for example transfer lengths larger than the supported hw max
@@@ -102,11 -103,11 +103,12 @@@ struct iop_adma_desc_slot 
        u16 idx;
        u16 unmap_src_cnt;
        size_t unmap_len;
+       struct list_head tx_list;
        struct dma_async_tx_descriptor async_tx;
        union {
                u32 *xor_check_result;
                u32 *crc32_result;
 +              u32 *pq_check_result;
        };
  };
  
diff --combined drivers/dma/Kconfig
index ddcd9793b25c2e4c2d612e0c27cc52ab596ba6b3,81e1020fb5148a75677aec191e974c554031d03f..fe1f3717b1ffcacffb12af5a9a75aa53df446fd2
@@@ -17,15 -17,11 +17,15 @@@ if DMADEVICE
  
  comment "DMA Devices"
  
 +config ASYNC_TX_DISABLE_CHANNEL_SWITCH
 +      bool
 +
  config INTEL_IOATDMA
        tristate "Intel I/OAT DMA support"
        depends on PCI && X86
        select DMA_ENGINE
        select DCA
 +      select ASYNC_TX_DISABLE_CHANNEL_SWITCH
        help
          Enable support for the Intel(R) I/OAT DMA engine present
          in recent Intel Xeon chipsets.
@@@ -50,6 -46,14 +50,14 @@@ config DW_DMA
          Support the Synopsys DesignWare AHB DMA controller.  This
          can be integrated in chips such as the Atmel AT32ap7000.
  
+ config AT_HDMAC
+       tristate "Atmel AHB DMA support"
+       depends on ARCH_AT91SAM9RL
+       select DMA_ENGINE
+       help
+         Support the Atmel AHB DMA controller.  This can be integrated in
+         chips such as the Atmel AT91SAM9RL.
  config FSL_DMA
        tristate "Freescale Elo and Elo Plus DMA support"
        depends on FSL_SOC
@@@ -85,6 -89,14 +93,14 @@@ config MX3_IPU_IRQ
          To avoid bloating the irq_desc[] array we allocate a sufficient
          number of IRQ slots and map them dynamically to specific sources.
  
+ config TXX9_DMAC
+       tristate "Toshiba TXx9 SoC DMA support"
+       depends on MACH_TX49XX || MACH_TX39XX
+       select DMA_ENGINE
+       help
+         Support the TXx9 SoC internal DMA controller.  This can be
+         integrated in chips such as the Toshiba TX4927/38/39.
  config DMA_ENGINE
        bool
  
@@@ -104,7 -116,7 +120,7 @@@ config NET_DM
  
  config ASYNC_TX_DMA
        bool "Async_tx: Offload support for the async_tx api"
-       depends on DMA_ENGINE
+       depends on DMA_ENGINE && !HIGHMEM64G
        help
          This allows the async_tx api to take advantage of offload engines for
          memcpy, memset, xor, and raid6 p+q operations.  If your platform has
diff --combined drivers/dma/dmaengine.c
index d5bc628d207cf29894a5d685e1d3df284b9788f6,562d182eae660178bf6155998d61b4c78b3c32b5..bd0b248de2cfabc28f1fecd63e5d64ed61236844
@@@ -608,40 -608,6 +608,40 @@@ void dmaengine_put(void
  }
  EXPORT_SYMBOL(dmaengine_put);
  
 +static bool device_has_all_tx_types(struct dma_device *device)
 +{
 +      /* A device that satisfies this test has channels that will never cause
 +       * an async_tx channel switch event as all possible operation types can
 +       * be handled.
 +       */
 +      #ifdef CONFIG_ASYNC_TX_DMA
 +      if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
 +              return false;
 +      #endif
 +
 +      #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
 +      if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
 +              return false;
 +      #endif
 +
 +      #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
 +      if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
 +              return false;
 +      #endif
 +
 +      #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
 +      if (!dma_has_cap(DMA_XOR, device->cap_mask))
 +              return false;
 +      #endif
 +
 +      #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
 +      if (!dma_has_cap(DMA_PQ, device->cap_mask))
 +              return false;
 +      #endif
 +
 +      return true;
 +}
 +
  static int get_dma_id(struct dma_device *device)
  {
        int rc;
@@@ -678,12 -644,8 +678,12 @@@ int dma_async_device_register(struct dm
                !device->device_prep_dma_memcpy);
        BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
                !device->device_prep_dma_xor);
 -      BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
 -              !device->device_prep_dma_zero_sum);
 +      BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
 +              !device->device_prep_dma_xor_val);
 +      BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
 +              !device->device_prep_dma_pq);
 +      BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
 +              !device->device_prep_dma_pq_val);
        BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
                !device->device_prep_dma_memset);
        BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
        BUG_ON(!device->device_issue_pending);
        BUG_ON(!device->dev);
  
 +      /* note: this only matters in the
 +       * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
 +       */
 +      if (device_has_all_tx_types(device))
 +              dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
 +
        idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
        if (!idr_ref)
                return -ENOMEM;
@@@ -977,30 -933,54 +977,29 @@@ void dma_async_tx_descriptor_init(struc
  {
        tx->chan = chan;
        spin_lock_init(&tx->lock);
-       INIT_LIST_HEAD(&tx->tx_list);
  }
  EXPORT_SYMBOL(dma_async_tx_descriptor_init);
  
  /* dma_wait_for_async_tx - spin wait for a transaction to complete
   * @tx: in-flight transaction to wait on
 - *
 - * This routine assumes that tx was obtained from a call to async_memcpy,
 - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
 - * and submitted).  Walking the parent chain is only meant to cover for DMA
 - * drivers that do not implement the DMA_INTERRUPT capability and may race with
 - * the driver's descriptor cleanup routine.
   */
  enum dma_status
  dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
  {
 -      enum dma_status status;
 -      struct dma_async_tx_descriptor *iter;
 -      struct dma_async_tx_descriptor *parent;
 +      unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
  
        if (!tx)
                return DMA_SUCCESS;
  
 -      WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
 -                " %s\n", __func__, dma_chan_name(tx->chan));
 -
 -      /* poll through the dependency chain, return when tx is complete */
 -      do {
 -              iter = tx;
 -
 -              /* find the root of the unsubmitted dependency chain */
 -              do {
 -                      parent = iter->parent;
 -                      if (!parent)
 -                              break;
 -                      else
 -                              iter = parent;
 -              } while (parent);
 -
 -              /* there is a small window for ->parent == NULL and
 -               * ->cookie == -EBUSY
 -               */
 -              while (iter->cookie == -EBUSY)
 -                      cpu_relax();
 -
 -              status = dma_sync_wait(iter->chan, iter->cookie);
 -      } while (status == DMA_IN_PROGRESS || (iter != tx));
 -
 -      return status;
 +      while (tx->cookie == -EBUSY) {
 +              if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
 +                      pr_err("%s timeout waiting for descriptor submission\n",
 +                              __func__);
 +                      return DMA_ERROR;
 +              }
 +              cpu_relax();
 +      }
 +      return dma_sync_wait(tx->chan, tx->cookie);
  }
  EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  
diff --combined drivers/dma/dmatest.c
index a3722a7384b5048a938d52215498218fffaf4bb0,d93017fc7872d1f0fe3904ff85870ff8aee1156a..a32a4cf7b1e049ab537db5c065f4f6a474dca2d2
@@@ -38,16 -38,16 +38,21 @@@ module_param(max_channels, uint, S_IRUG
  MODULE_PARM_DESC(max_channels,
                "Maximum number of channels to use (default: all)");
  
+ static unsigned int iterations;
+ module_param(iterations, uint, S_IRUGO);
+ MODULE_PARM_DESC(iterations,
+               "Iterations before stopping test (default: infinite)");
  static unsigned int xor_sources = 3;
  module_param(xor_sources, uint, S_IRUGO);
  MODULE_PARM_DESC(xor_sources,
                "Number of xor source buffers (default: 3)");
  
 +static unsigned int pq_sources = 3;
 +module_param(pq_sources, uint, S_IRUGO);
 +MODULE_PARM_DESC(pq_sources,
 +              "Number of p+q source buffers (default: 3)");
 +
  /*
   * Initialization patterns. All bytes in the source buffer has bit 7
   * set, all bytes in the destination buffer has bit 7 cleared.
@@@ -119,7 -119,7 +124,7 @@@ static void dmatest_init_srcs(u8 **bufs
                        buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
                for ( ; i < start + len; i++)
                        buf[i] = PATTERN_SRC | PATTERN_COPY
-                               | (~i & PATTERN_COUNT_MASK);;
+                               | (~i & PATTERN_COUNT_MASK);
                for ( ; i < test_buf_size; i++)
                        buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
                buf++;
@@@ -232,7 -232,6 +237,7 @@@ static int dmatest_func(void *data
        dma_cookie_t            cookie;
        enum dma_status         status;
        enum dma_ctrl_flags     flags;
 +      u8                      pq_coefs[pq_sources];
        int                     ret;
        int                     src_cnt;
        int                     dst_cnt;
        else if (thread->type == DMA_XOR) {
                src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
                dst_cnt = 1;
 +      } else if (thread->type == DMA_PQ) {
 +              src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
 +              dst_cnt = 2;
 +              for (i = 0; i < pq_sources; i++)
 +                      pq_coefs[i] = 1;
        } else
                goto err_srcs;
  
  
        flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
  
-       while (!kthread_should_stop()) {
+       while (!kthread_should_stop()
+              && !(iterations && total_tests >= iterations)) {
                struct dma_device *dev = chan->device;
                struct dma_async_tx_descriptor *tx = NULL;
                dma_addr_t dma_srcs[src_cnt];
                dma_addr_t dma_dsts[dst_cnt];
                struct completion cmp;
                unsigned long tmo = msecs_to_jiffies(3000);
 +              u8 align = 0;
  
                total_tests++;
  
                src_off = dmatest_random() % (test_buf_size - len + 1);
                dst_off = dmatest_random() % (test_buf_size - len + 1);
  
 +              /* honor alignment restrictions */
 +              if (thread->type == DMA_MEMCPY)
 +                      align = dev->copy_align;
 +              else if (thread->type == DMA_XOR)
 +                      align = dev->xor_align;
 +              else if (thread->type == DMA_PQ)
 +                      align = dev->pq_align;
 +
 +              len = (len >> align) << align;
 +              src_off = (src_off >> align) << align;
 +              dst_off = (dst_off >> align) << align;
 +
                dmatest_init_srcs(thread->srcs, src_off, len);
                dmatest_init_dsts(thread->dsts, dst_off, len);
  
                                                     DMA_BIDIRECTIONAL);
                }
  
 +
                if (thread->type == DMA_MEMCPY)
                        tx = dev->device_prep_dma_memcpy(chan,
                                                         dma_dsts[0] + dst_off,
                                                      dma_dsts[0] + dst_off,
                                                      dma_srcs, xor_sources,
                                                      len, flags);
 +              else if (thread->type == DMA_PQ) {
 +                      dma_addr_t dma_pq[dst_cnt];
 +
 +                      for (i = 0; i < dst_cnt; i++)
 +                              dma_pq[i] = dma_dsts[i] + dst_off;
 +                      tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
 +                                                   pq_sources, pq_coefs,
 +                                                   len, flags);
 +              }
  
                if (!tx) {
                        for (i = 0; i < src_cnt; i++)
@@@ -450,6 -422,13 +456,13 @@@ err_srcbuf
  err_srcs:
        pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
                        thread_name, total_tests, failed_tests, ret);
+       if (iterations > 0)
+               while (!kthread_should_stop()) {
+                       DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit);
+                       interruptible_sleep_on(&wait_dmatest_exit);
+               }
        return ret;
  }
  
@@@ -480,8 -459,6 +493,8 @@@ static int dmatest_add_threads(struct d
                op = "copy";
        else if (type == DMA_XOR)
                op = "xor";
 +      else if (type == DMA_PQ)
 +              op = "pq";
        else
                return -EINVAL;
  
@@@ -531,16 -508,12 +544,16 @@@ static int dmatest_add_channel(struct d
  
        if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
                cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
-               thread_count += cnt > 0 ?: 0;
+               thread_count += cnt > 0 ? cnt : 0;
        }
        if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
                cnt = dmatest_add_threads(dtc, DMA_XOR);
-               thread_count += cnt > 0 ?: 0;
+               thread_count += cnt > 0 ? cnt : 0;
        }
 +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
 +              cnt = dmatest_add_threads(dtc, DMA_PQ);
 +              thread_count += cnt > 0 ?: 0;
 +      }
  
        pr_info("dmatest: Started %u threads using %s\n",
                thread_count, dma_chan_name(chan));
diff --combined drivers/dma/ioat/dma.c
index 32a757be75c1896e47e6c913641b1a344ba46872,21527b89590cce79295e5aa3f6eccc9c735522d9..c524d36d3c2e199db83e5b2a9a8a7b7bf1939a21
@@@ -251,19 -251,18 +251,19 @@@ static dma_cookie_t ioat1_tx_submit(str
        dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
  
        /* write address into NextDescriptor field of last desc in chain */
-       first = to_ioat_desc(tx->tx_list.next);
+       first = to_ioat_desc(desc->tx_list.next);
        chain_tail = to_ioat_desc(ioat->used_desc.prev);
        /* make descriptor updates globally visible before chaining */
        wmb();
        chain_tail->hw->next = first->txd.phys;
-       list_splice_tail_init(&tx->tx_list, &ioat->used_desc);
+       list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
        dump_desc_dbg(ioat, chain_tail);
        dump_desc_dbg(ioat, first);
  
        if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
                mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
  
 +      ioat->active += desc->hw->tx_cnt;
        ioat->pending += desc->hw->tx_cnt;
        if (ioat->pending >= ioat_pending_level)
                __ioat1_dma_memcpy_issue_pending(ioat);
@@@ -298,6 -297,7 +298,7 @@@ ioat_dma_alloc_descriptor(struct ioat_d
  
        memset(desc, 0, sizeof(*desc));
  
+       INIT_LIST_HEAD(&desc_sw->tx_list);
        dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
        desc_sw->txd.tx_submit = ioat1_tx_submit;
        desc_sw->hw = desc;
@@@ -522,7 -522,7 +523,7 @@@ ioat1_dma_prep_memcpy(struct dma_chan *
  
        desc->txd.flags = flags;
        desc->len = total_len;
-       list_splice(&chain, &desc->txd.tx_list);
+       list_splice(&chain, &desc->tx_list);
        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
        hw->ctl_f.compl_write = 1;
        hw->tx_cnt = tx_cnt;
@@@ -539,6 -539,17 +540,6 @@@ static void ioat1_cleanup_tasklet(unsig
        writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
  }
  
 -static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
 -                     int direction, enum dma_ctrl_flags flags, bool dst)
 -{
 -      if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
 -          (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
 -              pci_unmap_single(pdev, addr, len, direction);
 -      else
 -              pci_unmap_page(pdev, addr, len, direction);
 -}
 -
 -
  void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
                    size_t len, struct ioat_dma_descriptor *hw)
  {
@@@ -612,7 -623,6 +613,7 @@@ static void __cleanup(struct ioat_dma_c
                        chan->completed_cookie = tx->cookie;
                        tx->cookie = 0;
                        ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
 +                      ioat->active -= desc->hw->tx_cnt;
                        if (tx->callback) {
                                tx->callback(tx->callback_param);
                                tx->callback = NULL;
@@@ -799,7 -809,7 +800,7 @@@ static void __devinit ioat_dma_test_cal
   * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
   * @device: device to be tested
   */
 -static int __devinit ioat_dma_self_test(struct ioatdma_device *device)
 +int __devinit ioat_dma_self_test(struct ioatdma_device *device)
  {
        int i;
        u8 *src;
@@@ -1030,8 -1040,13 +1031,8 @@@ int __devinit ioat_probe(struct ioatdma
        dma_cap_set(DMA_MEMCPY, dma->cap_mask);
        dma->dev = &pdev->dev;
  
 -      dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
 -              " %d channels, device version 0x%02x, driver version %s\n",
 -              dma->chancnt, device->version, IOAT_DMA_VERSION);
 -
        if (!dma->chancnt) {
 -              dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
 -                      "zero channels detected\n");
 +              dev_err(dev, "zero channels detected\n");
                goto err_setup_interrupts;
        }
  
        if (err)
                goto err_setup_interrupts;
  
 -      err = ioat_dma_self_test(device);
 +      err = device->self_test(device);
        if (err)
                goto err_self_test;
  
@@@ -1082,113 -1097,6 +1083,113 @@@ static void ioat1_intr_quirk(struct ioa
        pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
  }
  
 +static ssize_t ring_size_show(struct dma_chan *c, char *page)
 +{
 +      struct ioat_dma_chan *ioat = to_ioat_chan(c);
 +
 +      return sprintf(page, "%d\n", ioat->desccount);
 +}
 +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
 +
 +static ssize_t ring_active_show(struct dma_chan *c, char *page)
 +{
 +      struct ioat_dma_chan *ioat = to_ioat_chan(c);
 +
 +      return sprintf(page, "%d\n", ioat->active);
 +}
 +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
 +
 +static ssize_t cap_show(struct dma_chan *c, char *page)
 +{
 +      struct dma_device *dma = c->device;
 +
 +      return sprintf(page, "copy%s%s%s%s%s%s\n",
 +                     dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
 +                     dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
 +                     dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
 +                     dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
 +                     dma_has_cap(DMA_MEMSET, dma->cap_mask)  ? " fill" : "",
 +                     dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
 +
 +}
 +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
 +
 +static ssize_t version_show(struct dma_chan *c, char *page)
 +{
 +      struct dma_device *dma = c->device;
 +      struct ioatdma_device *device = to_ioatdma_device(dma);
 +
 +      return sprintf(page, "%d.%d\n",
 +                     device->version >> 4, device->version & 0xf);
 +}
 +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
 +
 +static struct attribute *ioat1_attrs[] = {
 +      &ring_size_attr.attr,
 +      &ring_active_attr.attr,
 +      &ioat_cap_attr.attr,
 +      &ioat_version_attr.attr,
 +      NULL,
 +};
 +
 +static ssize_t
 +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 +{
 +      struct ioat_sysfs_entry *entry;
 +      struct ioat_chan_common *chan;
 +
 +      entry = container_of(attr, struct ioat_sysfs_entry, attr);
 +      chan = container_of(kobj, struct ioat_chan_common, kobj);
 +
 +      if (!entry->show)
 +              return -EIO;
 +      return entry->show(&chan->common, page);
 +}
 +
 +struct sysfs_ops ioat_sysfs_ops = {
 +      .show   = ioat_attr_show,
 +};
 +
 +static struct kobj_type ioat1_ktype = {
 +      .sysfs_ops = &ioat_sysfs_ops,
 +      .default_attrs = ioat1_attrs,
 +};
 +
 +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
 +{
 +      struct dma_device *dma = &device->common;
 +      struct dma_chan *c;
 +
 +      list_for_each_entry(c, &dma->channels, device_node) {
 +              struct ioat_chan_common *chan = to_chan_common(c);
 +              struct kobject *parent = &c->dev->device.kobj;
 +              int err;
 +
 +              err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
 +              if (err) {
 +                      dev_warn(to_dev(chan),
 +                               "sysfs init error (%d), continuing...\n", err);
 +                      kobject_put(&chan->kobj);
 +                      set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
 +              }
 +      }
 +}
 +
 +void ioat_kobject_del(struct ioatdma_device *device)
 +{
 +      struct dma_device *dma = &device->common;
 +      struct dma_chan *c;
 +
 +      list_for_each_entry(c, &dma->channels, device_node) {
 +              struct ioat_chan_common *chan = to_chan_common(c);
 +
 +              if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
 +                      kobject_del(&chan->kobj);
 +                      kobject_put(&chan->kobj);
 +              }
 +      }
 +}
 +
  int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
  {
        struct pci_dev *pdev = device->pdev;
  
        device->intr_quirk = ioat1_intr_quirk;
        device->enumerate_channels = ioat1_enumerate_channels;
 +      device->self_test = ioat_dma_self_test;
        dma = &device->common;
        dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
        dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
        err = ioat_register(device);
        if (err)
                return err;
 +      ioat_kobject_add(device, &ioat1_ktype);
 +
        if (dca)
                device->dca = ioat_dca_init(pdev, device->reg_base);
  
@@@ -1226,8 -1131,6 +1227,8 @@@ void __devexit ioat_dma_remove(struct i
  
        ioat_disable_interrupts(device);
  
 +      ioat_kobject_del(device);
 +
        dma_async_device_unregister(dma);
  
        pci_pool_destroy(device->dma_pool);
diff --combined drivers/dma/ioat/dma.h
index 0e37e426c729c86157424ca28b544ae1f250aa93,8966fa5453a797770a5f2d0f89165055efb11043..6a675a2a2d1cffbe05938d3981f4945a21df1997
   * @dca: direct cache access context
   * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
   * @enumerate_channels: hw version specific channel enumeration
 + * @cleanup_tasklet: select between the v2 and v3 cleanup routines
 + * @timer_fn: select between the v2 and v3 timer watchdog routines
 + * @self_test: hardware version specific self test for each supported op type
 + *
 + * Note: the v3 cleanup routine supports raid operations
   */
 -
  struct ioatdma_device {
        struct pci_dev *pdev;
        void __iomem *reg_base;
@@@ -78,9 -74,6 +78,9 @@@
        struct dca_provider *dca;
        void (*intr_quirk)(struct ioatdma_device *device);
        int (*enumerate_channels)(struct ioatdma_device *device);
 +      void (*cleanup_tasklet)(unsigned long data);
 +      void (*timer_fn)(unsigned long data);
 +      int (*self_test)(struct ioatdma_device *device);
  };
  
  struct ioat_chan_common {
@@@ -93,7 -86,6 +93,7 @@@
        #define IOAT_COMPLETION_PENDING 0
        #define IOAT_COMPLETION_ACK 1
        #define IOAT_RESET_PENDING 2
 +      #define IOAT_KOBJ_INIT_FAIL 3
        struct timer_list timer;
        #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
        #define IDLE_TIMEOUT msecs_to_jiffies(2000)
        dma_addr_t completion_dma;
        u64 *completion;
        struct tasklet_struct cleanup_task;
 +      struct kobject kobj;
  };
  
 +struct ioat_sysfs_entry {
 +      struct attribute attr;
 +      ssize_t (*show)(struct dma_chan *, char *);
 +};
  
  /**
   * struct ioat_dma_chan - internal representation of a DMA channel
@@@ -124,7 -111,6 +124,7 @@@ struct ioat_dma_chan 
  
        int pending;
        u16 desccount;
 +      u16 active;
  };
  
  static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
@@@ -169,9 -155,9 +169,9 @@@ ioat_is_complete(struct dma_chan *c, dm
  
  /**
   * struct ioat_desc_sw - wrapper around hardware descriptor
 - * @hw: hardware DMA descriptor
 + * @hw: hardware DMA descriptor (for memcpy)
   * @node: this descriptor will either be on the free list,
-  *     or attached to a transaction list (async_tx.tx_list)
+  *     or attached to a transaction list (tx_list)
   * @txd: the generic software descriptor for all engines
   * @id: identifier for debug
   */
@@@ -179,6 -165,7 +179,7 @@@ struct ioat_desc_sw 
        struct ioat_dma_descriptor *hw;
        struct list_head node;
        size_t len;
+       struct list_head tx_list;
        struct dma_async_tx_descriptor txd;
        #ifdef DEBUG
        int id;
@@@ -301,20 -288,9 +302,20 @@@ static inline bool is_ioat_bug(unsigne
                         IOAT_CHANERR_LENGTH_ERR));
  }
  
 +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
 +                            int direction, enum dma_ctrl_flags flags, bool dst)
 +{
 +      if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
 +          (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
 +              pci_unmap_single(pdev, addr, len, direction);
 +      else
 +              pci_unmap_page(pdev, addr, len, direction);
 +}
 +
  int __devinit ioat_probe(struct ioatdma_device *device);
  int __devinit ioat_register(struct ioatdma_device *device);
  int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
 +int __devinit ioat_dma_self_test(struct ioatdma_device *device);
  void __devexit ioat_dma_remove(struct ioatdma_device *device);
  struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
                                              void __iomem *iobase);
@@@ -328,9 -304,4 +329,9 @@@ void ioat_dma_unmap(struct ioat_chan_co
                    size_t len, struct ioat_dma_descriptor *hw);
  bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
                           unsigned long *phys_complete);
 +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
 +void ioat_kobject_del(struct ioatdma_device *device);
 +extern struct sysfs_ops ioat_sysfs_ops;
 +extern struct ioat_sysfs_entry ioat_version_attr;
 +extern struct ioat_sysfs_entry ioat_cap_attr;
  #endif /* IOATDMA_H */
index 7bbbd83d12e68aa7bca1277e5819f65045b01a25,fa3d6db6624cf31597e9f4099b4560edfc541de9..5d6ac49e0d3258c4a92f48319271329ef2cd1137
@@@ -39,7 -39,7 +39,7 @@@
  #include "registers.h"
  #include "hw.h"
  
 -static int ioat_ring_alloc_order = 8;
 +int ioat_ring_alloc_order = 8;
  module_param(ioat_ring_alloc_order, int, 0644);
  MODULE_PARM_DESC(ioat_ring_alloc_order,
                 "ioat2+: allocate 2^n descriptors per channel (default: n=8)");
@@@ -48,7 -48,7 +48,7 @@@ module_param(ioat_ring_max_alloc_order
  MODULE_PARM_DESC(ioat_ring_max_alloc_order,
                 "ioat2+: upper limit for dynamic ring resizing (default: n=16)");
  
 -static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
 +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
  {
        void * __iomem reg_base = ioat->base.reg_base;
  
@@@ -63,7 -63,7 +63,7 @@@
                __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
  }
  
 -static void ioat2_issue_pending(struct dma_chan *chan)
 +void ioat2_issue_pending(struct dma_chan *chan)
  {
        struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
  
@@@ -206,7 -206,7 +206,7 @@@ static void ioat2_cleanup(struct ioat2_
        spin_unlock_bh(&chan->cleanup_lock);
  }
  
 -static void ioat2_cleanup_tasklet(unsigned long data)
 +void ioat2_cleanup_tasklet(unsigned long data)
  {
        struct ioat2_dma_chan *ioat = (void *) data;
  
        writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
  }
  
 -static void __restart_chan(struct ioat2_dma_chan *ioat)
 +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
  {
        struct ioat_chan_common *chan = &ioat->base;
  
@@@ -255,10 -255,12 +255,10 @@@ static void ioat2_restart_channel(struc
        if (ioat_cleanup_preamble(chan, &phys_complete))
                __cleanup(ioat, phys_complete);
  
 -      __restart_chan(ioat);
 +      __ioat2_restart_chan(ioat);
  }
  
 -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
 -
 -static void ioat2_timer_event(unsigned long data)
 +void ioat2_timer_event(unsigned long data)
  {
        struct ioat2_dma_chan *ioat = (void *) data;
        struct ioat_chan_common *chan = &ioat->base;
   * ioat2_enumerate_channels - find and initialize the device's channels
   * @device: the device to be enumerated
   */
 -static int ioat2_enumerate_channels(struct ioatdma_device *device)
 +int ioat2_enumerate_channels(struct ioatdma_device *device)
  {
        struct ioat2_dma_chan *ioat;
        struct device *dev = &device->pdev->dev;
                        break;
  
                ioat_init_channel(device, &ioat->base, i,
 -                                ioat2_timer_event,
 -                                ioat2_cleanup_tasklet,
 +                                device->timer_fn,
 +                                device->cleanup_tasklet,
                                  (unsigned long) ioat);
                ioat->xfercap_log = xfercap_log;
                spin_lock_init(&ioat->ring_lock);
@@@ -397,11 -399,12 +397,12 @@@ static struct ioat_ring_ent *ioat2_allo
                return NULL;
        memset(hw, 0, sizeof(*hw));
  
-       desc = kzalloc(sizeof(*desc), flags);
+       desc = kmem_cache_alloc(ioat2_cache, flags);
        if (!desc) {
                pci_pool_free(dma->dma_pool, hw, phys);
                return NULL;
        }
+       memset(desc, 0, sizeof(*desc));
  
        dma_async_tx_descriptor_init(&desc->txd, chan);
        desc->txd.tx_submit = ioat2_tx_submit_unlock;
@@@ -416,7 -419,7 +417,7 @@@ static void ioat2_free_ring_ent(struct 
  
        dma = to_ioatdma_device(chan->device);
        pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
-       kfree(desc);
+       kmem_cache_free(ioat2_cache, desc);
  }
  
  static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
  /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
   * @chan: channel to be initialized
   */
 -static int ioat2_alloc_chan_resources(struct dma_chan *c)
 +int ioat2_alloc_chan_resources(struct dma_chan *c)
  {
        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
        struct ioat_chan_common *chan = &ioat->base;
        return 1 << ioat->alloc_order;
  }
  
 -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
 +bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
  {
        /* reshape differs from normal ring allocation in that we want
         * to allocate a new software ring while only
   * @ioat: ioat2,3 channel (ring) to operate on
   * @num_descs: allocation length
   */
 -static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
 +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
  {
        struct ioat_chan_common *chan = &ioat->base;
  
                spin_lock_bh(&chan->cleanup_lock);
                if (jiffies > chan->timer.expires &&
                    timer_pending(&chan->timer)) {
 +                      struct ioatdma_device *device = chan->device;
 +
                        mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
                        spin_unlock_bh(&chan->cleanup_lock);
 -                      ioat2_timer_event((unsigned long) ioat);
 +                      device->timer_fn((unsigned long) ioat);
                } else
                        spin_unlock_bh(&chan->cleanup_lock);
                return -ENOMEM;
        return 0;  /* with ioat->ring_lock held */
  }
  
 -static struct dma_async_tx_descriptor *
 +struct dma_async_tx_descriptor *
  ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
                           dma_addr_t dma_src, size_t len, unsigned long flags)
  {
        desc->txd.flags = flags;
        desc->len = total_len;
        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 +      hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
        hw->ctl_f.compl_write = 1;
        dump_desc_dbg(ioat, desc);
        /* we leave the channel locked to ensure in order submission */
   * ioat2_free_chan_resources - release all the descriptors
   * @chan: the channel to be cleaned
   */
 -static void ioat2_free_chan_resources(struct dma_chan *c)
 +void ioat2_free_chan_resources(struct dma_chan *c)
  {
        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
        struct ioat_chan_common *chan = &ioat->base;
 -      struct ioatdma_device *ioatdma_device = chan->device;
 +      struct ioatdma_device *device = chan->device;
        struct ioat_ring_ent *desc;
        const u16 total_descs = 1 << ioat->alloc_order;
        int descs;
  
        tasklet_disable(&chan->cleanup_task);
        del_timer_sync(&chan->timer);
 -      ioat2_cleanup(ioat);
 +      device->cleanup_tasklet((unsigned long) ioat);
  
        /* Delay 100ms after reset to allow internal DMA logic to quiesce
         * before removing DMA descriptor resources.
        kfree(ioat->ring);
        ioat->ring = NULL;
        ioat->alloc_order = 0;
 -      pci_pool_free(ioatdma_device->completion_pool,
 -                    chan->completion,
 +      pci_pool_free(device->completion_pool, chan->completion,
                      chan->completion_dma);
        spin_unlock_bh(&ioat->ring_lock);
  
        ioat->dmacount = 0;
  }
  
 -static enum dma_status
 +enum dma_status
  ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
                     dma_cookie_t *done, dma_cookie_t *used)
  {
        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 +      struct ioatdma_device *device = ioat->base.device;
  
        if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
                return DMA_SUCCESS;
  
 -      ioat2_cleanup(ioat);
 +      device->cleanup_tasklet((unsigned long) ioat);
  
        return ioat_is_complete(c, cookie, done, used);
  }
  
 -int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
 +static ssize_t ring_size_show(struct dma_chan *c, char *page)
  {
 -      struct pci_dev *pdev = device->pdev;
 -      struct dma_device *dma;
 -      struct dma_chan *c;
 -      struct ioat_chan_common *chan;
 -      int err;
 +      struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
  
 -      device->enumerate_channels = ioat2_enumerate_channels;
 -      dma = &device->common;
 -      dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
 -      dma->device_issue_pending = ioat2_issue_pending;
 -      dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
 -      dma->device_free_chan_resources = ioat2_free_chan_resources;
 -      dma->device_is_tx_complete = ioat2_is_complete;
 +      return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
 +}
 +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
  
 -      err = ioat_probe(device);
 -      if (err)
 -              return err;
 -      ioat_set_tcp_copy_break(2048);
 +static ssize_t ring_active_show(struct dma_chan *c, char *page)
 +{
 +      struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
  
 -      list_for_each_entry(c, &dma->channels, device_node) {
 -              chan = to_chan_common(c);
 -              writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
 -                     chan->reg_base + IOAT_DCACTRL_OFFSET);
 -      }
 +      /* ...taken outside the lock, no need to be precise */
 +      return sprintf(page, "%d\n", ioat2_ring_active(ioat));
 +}
 +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
  
 -      err = ioat_register(device);
 -      if (err)
 -              return err;
 -      if (dca)
 -              device->dca = ioat2_dca_init(pdev, device->reg_base);
 +static struct attribute *ioat2_attrs[] = {
 +      &ring_size_attr.attr,
 +      &ring_active_attr.attr,
 +      &ioat_cap_attr.attr,
 +      &ioat_version_attr.attr,
 +      NULL,
 +};
  
 -      return err;
 -}
 +struct kobj_type ioat2_ktype = {
 +      .sysfs_ops = &ioat_sysfs_ops,
 +      .default_attrs = ioat2_attrs,
 +};
  
 -int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
 +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
  {
        struct pci_dev *pdev = device->pdev;
        struct dma_device *dma;
        struct dma_chan *c;
        struct ioat_chan_common *chan;
        int err;
 -      u16 dev_id;
  
        device->enumerate_channels = ioat2_enumerate_channels;
 +      device->cleanup_tasklet = ioat2_cleanup_tasklet;
 +      device->timer_fn = ioat2_timer_event;
 +      device->self_test = ioat_dma_self_test;
        dma = &device->common;
        dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
        dma->device_issue_pending = ioat2_issue_pending;
        dma->device_free_chan_resources = ioat2_free_chan_resources;
        dma->device_is_tx_complete = ioat2_is_complete;
  
 -      /* -= IOAT ver.3 workarounds =- */
 -      /* Write CHANERRMSK_INT with 3E07h to mask out the errors
 -       * that can cause stability issues for IOAT ver.3
 -       */
 -      pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
 -
 -      /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
 -       * (workaround for spurious config parity error after restart)
 -       */
 -      pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
 -      if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
 -              pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
 -
        err = ioat_probe(device);
        if (err)
                return err;
 -      ioat_set_tcp_copy_break(262144);
 +      ioat_set_tcp_copy_break(2048);
  
        list_for_each_entry(c, &dma->channels, device_node) {
                chan = to_chan_common(c);
 -              writel(IOAT_DMA_DCA_ANY_CPU,
 +              writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
                       chan->reg_base + IOAT_DCACTRL_OFFSET);
        }
  
        err = ioat_register(device);
        if (err)
                return err;
 +
 +      ioat_kobject_add(device, &ioat2_ktype);
 +
        if (dca)
 -              device->dca = ioat3_dca_init(pdev, device->reg_base);
 +              device->dca = ioat2_dca_init(pdev, device->reg_base);
  
        return err;
  }
index 246e646b1904d0a9f402d5ff6cf28a0a35a3b651,ac00adc81974adaf2545259ef1f5ddd7cc7ea7d2..1d849ef74d5f7ff3a14bc7350ce58ce7d42be9c8
@@@ -27,7 -27,6 +27,7 @@@
  
  
  extern int ioat_pending_level;
 +extern int ioat_ring_alloc_order;
  
  /*
   * workaround for IOAT ver.3.0 null descriptor issue
@@@ -115,36 -114,10 +115,36 @@@ static inline u16 ioat2_xferlen_to_desc
        return num_descs;
  }
  
 +/**
 + * struct ioat_ring_ent - wrapper around hardware descriptor
 + * @hw: hardware DMA descriptor (for memcpy)
 + * @fill: hardware fill descriptor
 + * @xor: hardware xor descriptor
 + * @xor_ex: hardware xor extension descriptor
 + * @pq: hardware pq descriptor
 + * @pq_ex: hardware pq extension descriptor
 + * @pqu: hardware pq update descriptor
 + * @raw: hardware raw (un-typed) descriptor
 + * @txd: the generic software descriptor for all engines
 + * @len: total transaction length for unmap
 + * @result: asynchronous result of validate operations
 + * @id: identifier for debug
 + */
 +
  struct ioat_ring_ent {
 -      struct ioat_dma_descriptor *hw;
 +      union {
 +              struct ioat_dma_descriptor *hw;
 +              struct ioat_fill_descriptor *fill;
 +              struct ioat_xor_descriptor *xor;
 +              struct ioat_xor_ext_descriptor *xor_ex;
 +              struct ioat_pq_descriptor *pq;
 +              struct ioat_pq_ext_descriptor *pq_ex;
 +              struct ioat_pq_update_descriptor *pqu;
 +              struct ioat_raw_descriptor *raw;
 +      };
-       struct dma_async_tx_descriptor txd;
        size_t len;
+       struct dma_async_tx_descriptor txd;
 +      enum sum_check_flags *result;
        #ifdef DEBUG
        int id;
        #endif
@@@ -170,20 -143,5 +170,21 @@@ int __devinit ioat2_dma_probe(struct io
  int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
  struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
  struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
 +int ioat2_enumerate_channels(struct ioatdma_device *device);
 +struct dma_async_tx_descriptor *
 +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
 +                         dma_addr_t dma_src, size_t len, unsigned long flags);
 +void ioat2_issue_pending(struct dma_chan *chan);
 +int ioat2_alloc_chan_resources(struct dma_chan *c);
 +void ioat2_free_chan_resources(struct dma_chan *c);
 +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
 +                                dma_cookie_t *done, dma_cookie_t *used);
 +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
 +bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
 +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
 +void ioat2_cleanup_tasklet(unsigned long data);
 +void ioat2_timer_event(unsigned long data);
 +extern struct kobj_type ioat2_ktype;
+ extern struct kmem_cache *ioat2_cache;
  #endif /* IOATDMA_V2_H */
diff --combined drivers/dma/ioat/pci.c
index b77d3a2864adbad8cbd31865883003c12f5cd447,61086c6bbf42a5baa8acae3945e8e8a6d8e038c7..c788fa2664708e99caaabe932dead8e3b61f40de
  #include "hw.h"
  
  MODULE_VERSION(IOAT_DMA_VERSION);
 -MODULE_LICENSE("GPL");
 +MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Intel Corporation");
  
  static struct pci_device_id ioat_pci_tbl[] = {
        /* I/OAT v1 platforms */
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
 +      { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
  
        /* I/OAT v2 platforms */
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
  
        /* I/OAT v3 platforms */
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
 -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
 +
 +      /* I/OAT v3.2 platforms */
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
 +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
 +
        { 0, }
  };
 +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
  
  static int __devinit ioat_pci_probe(struct pci_dev *pdev,
                                    const struct pci_device_id *id);
@@@ -83,6 -69,8 +83,8 @@@ static int ioat_dca_enabled = 1
  module_param(ioat_dca_enabled, int, 0644);
  MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
  
+ struct kmem_cache *ioat2_cache;
  #define DRV_NAME "ioatdma"
  
  static struct pci_driver ioat_pci_driver = {
@@@ -182,15 -170,24 +184,27 @@@ static void __devexit ioat_remove(struc
  
  static int __init ioat_init_module(void)
  {
-       return pci_register_driver(&ioat_pci_driver);
+       int err;
 +      pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
 +              DRV_NAME, IOAT_DMA_VERSION);
 +
+       ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+                                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ioat2_cache)
+               return -ENOMEM;
+       err = pci_register_driver(&ioat_pci_driver);
+       if (err)
+               kmem_cache_destroy(ioat2_cache);
+       return err;
  }
  module_init(ioat_init_module);
  
  static void __exit ioat_exit_module(void)
  {
        pci_unregister_driver(&ioat_pci_driver);
+       kmem_cache_destroy(ioat2_cache);
  }
  module_exit(ioat_exit_module);
diff --combined drivers/dma/iop-adma.c
index 518f557ef857379298f2c53b31cce521342d5af8,9f6c16f8e2be722ae6bca256d6032169f73591a3..645ca8d54ec43350059bd8d7c8802f017b4a4abd
@@@ -31,7 -31,6 +31,7 @@@
  #include <linux/platform_device.h>
  #include <linux/memory.h>
  #include <linux/ioport.h>
 +#include <linux/raid/pq.h>
  
  #include <mach/adma.h>
  
@@@ -58,110 -57,65 +58,110 @@@ static void iop_adma_free_slots(struct 
        }
  }
  
 +static void
 +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
 +{
 +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
 +      struct iop_adma_desc_slot *unmap = desc->group_head;
 +      struct device *dev = &iop_chan->device->pdev->dev;
 +      u32 len = unmap->unmap_len;
 +      enum dma_ctrl_flags flags = tx->flags;
 +      u32 src_cnt;
 +      dma_addr_t addr;
 +      dma_addr_t dest;
 +
 +      src_cnt = unmap->unmap_src_cnt;
 +      dest = iop_desc_get_dest_addr(unmap, iop_chan);
 +      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 +              enum dma_data_direction dir;
 +
 +              if (src_cnt > 1) /* is xor? */
 +                      dir = DMA_BIDIRECTIONAL;
 +              else
 +                      dir = DMA_FROM_DEVICE;
 +
 +              dma_unmap_page(dev, dest, len, dir);
 +      }
 +
 +      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 +              while (src_cnt--) {
 +                      addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
 +                      if (addr == dest)
 +                              continue;
 +                      dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
 +              }
 +      }
 +      desc->group_head = NULL;
 +}
 +
 +static void
 +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
 +{
 +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
 +      struct iop_adma_desc_slot *unmap = desc->group_head;
 +      struct device *dev = &iop_chan->device->pdev->dev;
 +      u32 len = unmap->unmap_len;
 +      enum dma_ctrl_flags flags = tx->flags;
 +      u32 src_cnt = unmap->unmap_src_cnt;
 +      dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
 +      dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
 +      int i;
 +
 +      if (tx->flags & DMA_PREP_CONTINUE)
 +              src_cnt -= 3;
 +
 +      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
 +              dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
 +              dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
 +      }
 +
 +      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 +              dma_addr_t addr;
 +
 +              for (i = 0; i < src_cnt; i++) {
 +                      addr = iop_desc_get_src_addr(unmap, iop_chan, i);
 +                      dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
 +              }
 +              if (desc->pq_check_result) {
 +                      dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
 +                      dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
 +              }
 +      }
 +
 +      desc->group_head = NULL;
 +}
 +
 +
  static dma_cookie_t
  iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
        struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
  {
 -      BUG_ON(desc->async_tx.cookie < 0);
 -      if (desc->async_tx.cookie > 0) {
 -              cookie = desc->async_tx.cookie;
 -              desc->async_tx.cookie = 0;
 +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
 +
 +      BUG_ON(tx->cookie < 0);
 +      if (tx->cookie > 0) {
 +              cookie = tx->cookie;
 +              tx->cookie = 0;
  
                /* call the callback (must not sleep or submit new
                 * operations to this channel)
                 */
 -              if (desc->async_tx.callback)
 -                      desc->async_tx.callback(
 -                              desc->async_tx.callback_param);
 +              if (tx->callback)
 +                      tx->callback(tx->callback_param);
  
                /* unmap dma addresses
                 * (unmap_single vs unmap_page?)
                 */
                if (desc->group_head && desc->unmap_len) {
 -                      struct iop_adma_desc_slot *unmap = desc->group_head;
 -                      struct device *dev =
 -                              &iop_chan->device->pdev->dev;
 -                      u32 len = unmap->unmap_len;
 -                      enum dma_ctrl_flags flags = desc->async_tx.flags;
 -                      u32 src_cnt;
 -                      dma_addr_t addr;
 -                      dma_addr_t dest;
 -
 -                      src_cnt = unmap->unmap_src_cnt;
 -                      dest = iop_desc_get_dest_addr(unmap, iop_chan);
 -                      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 -                              enum dma_data_direction dir;
 -
 -                              if (src_cnt > 1) /* is xor? */
 -                                      dir = DMA_BIDIRECTIONAL;
 -                              else
 -                                      dir = DMA_FROM_DEVICE;
 -
 -                              dma_unmap_page(dev, dest, len, dir);
 -                      }
 -
 -                      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 -                              while (src_cnt--) {
 -                                      addr = iop_desc_get_src_addr(unmap,
 -                                                                   iop_chan,
 -                                                                   src_cnt);
 -                                      if (addr == dest)
 -                                              continue;
 -                                      dma_unmap_page(dev, addr, len,
 -                                                     DMA_TO_DEVICE);
 -                              }
 -                      }
 -                      desc->group_head = NULL;
 +                      if (iop_desc_is_pq(desc))
 +                              iop_desc_unmap_pq(iop_chan, desc);
 +                      else
 +                              iop_desc_unmap(iop_chan, desc);
                }
        }
  
        /* run dependent operations */
 -      dma_run_dependencies(&desc->async_tx);
 +      dma_run_dependencies(tx);
  
        return cookie;
  }
@@@ -333,12 -287,7 +333,12 @@@ static void iop_adma_tasklet(unsigned l
  {
        struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
  
 -      spin_lock(&iop_chan->lock);
 +      /* lockdep will flag depedency submissions as potentially
 +       * recursive locking, this is not the case as a dependency
 +       * submission will never recurse a channels submit routine.
 +       * There are checks in async_tx.c to prevent this.
 +       */
 +      spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
        __iop_adma_slot_cleanup(iop_chan);
        spin_unlock(&iop_chan->lock);
  }
@@@ -421,7 -370,7 +421,7 @@@ retry
                        }
                        alloc_tail->group_head = alloc_start;
                        alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                        iop_chan->last_used = last_used;
                        iop_desc_clear_next_desc(alloc_start);
                        iop_desc_clear_next_desc(alloc_tail);
@@@ -480,7 -429,7 +480,7 @@@ iop_adma_tx_submit(struct dma_async_tx_
  
        old_chain_tail = list_entry(iop_chan->chain.prev,
                struct iop_adma_desc_slot, chain_node);
-       list_splice_init(&sw_desc->async_tx.tx_list,
+       list_splice_init(&sw_desc->tx_list,
                         &old_chain_tail->chain_node);
  
        /* fix up the hardware chain */
@@@ -547,6 -496,7 +547,7 @@@ static int iop_adma_alloc_chan_resource
  
                dma_async_tx_descriptor_init(&slot->async_tx, chan);
                slot->async_tx.tx_submit = iop_adma_tx_submit;
+               INIT_LIST_HEAD(&slot->tx_list);
                INIT_LIST_HEAD(&slot->chain_node);
                INIT_LIST_HEAD(&slot->slot_node);
                hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@@ -711,9 -661,9 +712,9 @@@ iop_adma_prep_dma_xor(struct dma_chan *
  }
  
  static struct dma_async_tx_descriptor *
 -iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
 -                         unsigned int src_cnt, size_t len, u32 *result,
 -                         unsigned long flags)
 +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
 +                        unsigned int src_cnt, size_t len, u32 *result,
 +                        unsigned long flags)
  {
        struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
        struct iop_adma_desc_slot *sw_desc, *grp_start;
        return sw_desc ? &sw_desc->async_tx : NULL;
  }
  
 +static struct dma_async_tx_descriptor *
 +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 +                   unsigned int src_cnt, const unsigned char *scf, size_t len,
 +                   unsigned long flags)
 +{
 +      struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 +      struct iop_adma_desc_slot *sw_desc, *g;
 +      int slot_cnt, slots_per_op;
 +      int continue_srcs;
 +
 +      if (unlikely(!len))
 +              return NULL;
 +      BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
 +
 +      dev_dbg(iop_chan->device->common.dev,
 +              "%s src_cnt: %d len: %u flags: %lx\n",
 +              __func__, src_cnt, len, flags);
 +
 +      if (dmaf_p_disabled_continue(flags))
 +              continue_srcs = 1+src_cnt;
 +      else if (dmaf_continue(flags))
 +              continue_srcs = 3+src_cnt;
 +      else
 +              continue_srcs = 0+src_cnt;
 +
 +      spin_lock_bh(&iop_chan->lock);
 +      slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
 +      sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 +      if (sw_desc) {
 +              int i;
 +
 +              g = sw_desc->group_head;
 +              iop_desc_set_byte_count(g, iop_chan, len);
 +
 +              /* even if P is disabled its destination address (bits
 +               * [3:0]) must match Q.  It is ok if P points to an
 +               * invalid address, it won't be written.
 +               */
 +              if (flags & DMA_PREP_PQ_DISABLE_P)
 +                      dst[0] = dst[1] & 0x7;
 +
 +              iop_desc_set_pq_addr(g, dst);
 +              sw_desc->unmap_src_cnt = src_cnt;
 +              sw_desc->unmap_len = len;
 +              sw_desc->async_tx.flags = flags;
 +              for (i = 0; i < src_cnt; i++)
 +                      iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
 +
 +              /* if we are continuing a previous operation factor in
 +               * the old p and q values, see the comment for dma_maxpq
 +               * in include/linux/dmaengine.h
 +               */
 +              if (dmaf_p_disabled_continue(flags))
 +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
 +              else if (dmaf_continue(flags)) {
 +                      iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
 +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
 +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
 +              }
 +              iop_desc_init_pq(g, i, flags);
 +      }
 +      spin_unlock_bh(&iop_chan->lock);
 +
 +      return sw_desc ? &sw_desc->async_tx : NULL;
 +}
 +
 +static struct dma_async_tx_descriptor *
 +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 +                       unsigned int src_cnt, const unsigned char *scf,
 +                       size_t len, enum sum_check_flags *pqres,
 +                       unsigned long flags)
 +{
 +      struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 +      struct iop_adma_desc_slot *sw_desc, *g;
 +      int slot_cnt, slots_per_op;
 +
 +      if (unlikely(!len))
 +              return NULL;
 +      BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
 +
 +      dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
 +              __func__, src_cnt, len);
 +
 +      spin_lock_bh(&iop_chan->lock);
 +      slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
 +      sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 +      if (sw_desc) {
 +              /* for validate operations p and q are tagged onto the
 +               * end of the source list
 +               */
 +              int pq_idx = src_cnt;
 +
 +              g = sw_desc->group_head;
 +              iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
 +              iop_desc_set_pq_zero_sum_byte_count(g, len);
 +              g->pq_check_result = pqres;
 +              pr_debug("\t%s: g->pq_check_result: %p\n",
 +                      __func__, g->pq_check_result);
 +              sw_desc->unmap_src_cnt = src_cnt+2;
 +              sw_desc->unmap_len = len;
 +              sw_desc->async_tx.flags = flags;
 +              while (src_cnt--)
 +                      iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
 +                                                        src[src_cnt],
 +                                                        scf[src_cnt]);
 +              iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
 +      }
 +      spin_unlock_bh(&iop_chan->lock);
 +
 +      return sw_desc ? &sw_desc->async_tx : NULL;
 +}
 +
  static void iop_adma_free_chan_resources(struct dma_chan *chan)
  {
        struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@@ -1069,7 -907,7 +1070,7 @@@ out
  
  #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
  static int __devinit
 -iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 +iop_adma_xor_val_self_test(struct iop_adma_device *device)
  {
        int i, src_idx;
        struct page *dest;
                PAGE_SIZE, DMA_TO_DEVICE);
  
        /* skip zero sum if the capability is not present */
 -      if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
 +      if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
                goto free_resources;
  
        /* zero sum the sources with the destintation page */
                dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                           zero_sum_srcs[i], 0, PAGE_SIZE,
                                           DMA_TO_DEVICE);
 -      tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
 -                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
 -                                      &zero_sum_result,
 -                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 +      tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
 +                                     IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
 +                                     &zero_sum_result,
 +                                     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  
        cookie = iop_adma_tx_submit(tx);
        iop_adma_issue_pending(dma_chan);
                dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                           zero_sum_srcs[i], 0, PAGE_SIZE,
                                           DMA_TO_DEVICE);
 -      tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
 -                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
 -                                      &zero_sum_result,
 -                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 +      tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
 +                                     IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
 +                                     &zero_sum_result,
 +                                     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  
        cookie = iop_adma_tx_submit(tx);
        iop_adma_issue_pending(dma_chan);
        return err;
  }
  
 +#ifdef CONFIG_MD_RAID6_PQ
 +static int __devinit
 +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
 +{
 +      /* combined sources, software pq results, and extra hw pq results */
 +      struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
 +      /* ptr to the extra hw pq buffers defined above */
 +      struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
 +      /* address conversion buffers (dma_map / page_address) */
 +      void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
 +      dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
 +      dma_addr_t pq_dest[2];
 +
 +      int i;
 +      struct dma_async_tx_descriptor *tx;
 +      struct dma_chan *dma_chan;
 +      dma_cookie_t cookie;
 +      u32 zero_sum_result;
 +      int err = 0;
 +      struct device *dev;
 +
 +      dev_dbg(device->common.dev, "%s\n", __func__);
 +
 +      for (i = 0; i < ARRAY_SIZE(pq); i++) {
 +              pq[i] = alloc_page(GFP_KERNEL);
 +              if (!pq[i]) {
 +                      while (i--)
 +                              __free_page(pq[i]);
 +                      return -ENOMEM;
 +              }
 +      }
 +
 +      /* Fill in src buffers */
 +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
 +              pq_sw[i] = page_address(pq[i]);
 +              memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
 +      }
 +      pq_sw[i] = page_address(pq[i]);
 +      pq_sw[i+1] = page_address(pq[i+1]);
 +
 +      dma_chan = container_of(device->common.channels.next,
 +                              struct dma_chan,
 +                              device_node);
 +      if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
 +              err = -ENODEV;
 +              goto out;
 +      }
 +
 +      dev = dma_chan->device->dev;
 +
 +      /* initialize the dests */
 +      memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
 +      memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
 +
 +      /* test pq */
 +      pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
 +      pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
 +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
 +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
 +                                       DMA_TO_DEVICE);
 +
 +      tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
 +                                IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
 +                                PAGE_SIZE,
 +                                DMA_PREP_INTERRUPT |
 +                                DMA_CTRL_ACK);
 +
 +      cookie = iop_adma_tx_submit(tx);
 +      iop_adma_issue_pending(dma_chan);
 +      msleep(8);
 +
 +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
 +              DMA_SUCCESS) {
 +              dev_err(dev, "Self-test pq timed out, disabling\n");
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +      raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
 +
 +      if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
 +                 page_address(pq_hw[0]), PAGE_SIZE) != 0) {
 +              dev_err(dev, "Self-test p failed compare, disabling\n");
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +      if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
 +                 page_address(pq_hw[1]), PAGE_SIZE) != 0) {
 +              dev_err(dev, "Self-test q failed compare, disabling\n");
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +      /* test correct zero sum using the software generated pq values */
 +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
 +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
 +                                       DMA_TO_DEVICE);
 +
 +      zero_sum_result = ~0;
 +      tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
 +                                    pq_src, IOP_ADMA_NUM_SRC_TEST,
 +                                    raid6_gfexp, PAGE_SIZE, &zero_sum_result,
 +                                    DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
 +
 +      cookie = iop_adma_tx_submit(tx);
 +      iop_adma_issue_pending(dma_chan);
 +      msleep(8);
 +
 +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
 +              DMA_SUCCESS) {
 +              dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +      if (zero_sum_result != 0) {
 +              dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
 +                      zero_sum_result);
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +      /* test incorrect zero sum */
 +      i = IOP_ADMA_NUM_SRC_TEST;
 +      memset(pq_sw[i] + 100, 0, 100);
 +      memset(pq_sw[i+1] + 200, 0, 200);
 +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
 +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
 +                                       DMA_TO_DEVICE);
 +
 +      zero_sum_result = 0;
 +      tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
 +                                    pq_src, IOP_ADMA_NUM_SRC_TEST,
 +                                    raid6_gfexp, PAGE_SIZE, &zero_sum_result,
 +                                    DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
 +
 +      cookie = iop_adma_tx_submit(tx);
 +      iop_adma_issue_pending(dma_chan);
 +      msleep(8);
 +
 +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
 +              DMA_SUCCESS) {
 +              dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +      if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
 +              dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
 +                      zero_sum_result);
 +              err = -ENODEV;
 +              goto free_resources;
 +      }
 +
 +free_resources:
 +      iop_adma_free_chan_resources(dma_chan);
 +out:
 +      i = ARRAY_SIZE(pq);
 +      while (i--)
 +              __free_page(pq[i]);
 +      return err;
 +}
 +#endif
 +
  static int __devexit iop_adma_remove(struct platform_device *dev)
  {
        struct iop_adma_device *device = platform_get_drvdata(dev);
@@@ -1519,16 -1193,9 +1520,16 @@@ static int __devinit iop_adma_probe(str
                dma_dev->max_xor = iop_adma_get_max_xor();
                dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
        }
 -      if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
 -              dma_dev->device_prep_dma_zero_sum =
 -                      iop_adma_prep_dma_zero_sum;
 +      if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
 +              dma_dev->device_prep_dma_xor_val =
 +                      iop_adma_prep_dma_xor_val;
 +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
 +              dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
 +              dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
 +      }
 +      if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
 +              dma_dev->device_prep_dma_pq_val =
 +                      iop_adma_prep_dma_pq_val;
        if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
                dma_dev->device_prep_dma_interrupt =
                        iop_adma_prep_dma_interrupt;
        }
  
        if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
 -              dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
 -              ret = iop_adma_xor_zero_sum_self_test(adev);
 +          dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
 +              ret = iop_adma_xor_val_self_test(adev);
                dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
                if (ret)
                        goto err_free_iop_chan;
        }
  
 +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
 +          dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
 +              #ifdef CONFIG_MD_RAID6_PQ
 +              ret = iop_adma_pq_zero_sum_self_test(adev);
 +              dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
 +              #else
 +              /* can not test raid6, so do not publish capability */
 +              dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
 +              dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
 +              ret = 0;
 +              #endif
 +              if (ret)
 +                      goto err_free_iop_chan;
 +      }
 +
        dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
 -        "( %s%s%s%s%s%s%s%s%s%s)\n",
 -        dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
 -        dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 -        dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
 +        "( %s%s%s%s%s%s%s)\n",
 +        dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
 +        dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
          dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
 -        dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
 -        dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
 +        dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
          dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
 -        dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
          dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
          dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
  
@@@ -1642,7 -1297,7 +1643,7 @@@ static void iop_chan_start_null_memcpy(
        if (sw_desc) {
                grp_start = sw_desc->group_head;
  
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                async_tx_ack(&sw_desc->async_tx);
                iop_desc_init_memcpy(grp_start, 0);
                iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@@ -1698,7 -1353,7 +1699,7 @@@ static void iop_chan_start_null_xor(str
        sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
        if (sw_desc) {
                grp_start = sw_desc->group_head;
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                async_tx_ack(&sw_desc->async_tx);
                iop_desc_init_null_xor(grp_start, 2, 0);
                iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --combined drivers/md/Kconfig
index 09c0c6e49ab596f2e5be99bb0fe4bc10bc4b367d,020f9573fd82011babb4ad666a2966a44d088aba..2158377a13593a45938278ac860d5de3db8a06fa
@@@ -124,8 -124,6 +124,8 @@@ config MD_RAID45
        select MD_RAID6_PQ
        select ASYNC_MEMCPY
        select ASYNC_XOR
 +      select ASYNC_PQ
 +      select ASYNC_RAID6_RECOV
        ---help---
          A RAID-5 set of N drives with a capacity of C MB per drive provides
          the capacity of C * (N - 1) MB, and protects against a failure
  
          If unsure, say Y.
  
 +config MULTICORE_RAID456
 +      bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
 +      depends on MD_RAID456
 +      depends on SMP
 +      depends on EXPERIMENTAL
 +      ---help---
 +        Enable the raid456 module to dispatch per-stripe raid operations to a
 +        thread pool.
 +
 +        If unsure, say N.
 +
  config MD_RAID6_PQ
        tristate
  
 +config ASYNC_RAID6_TEST
 +      tristate "Self test for hardware accelerated raid6 recovery"
 +      depends on MD_RAID6_PQ
 +      select ASYNC_RAID6_RECOV
 +      ---help---
 +        This is a one-shot self test that permutes through the
 +        recovery of all the possible two disk failure scenarios for a
 +        N-disk array.  Recovery is performed with the asynchronous
 +        raid6 recovery routines, and will optionally use an offload
 +        engine if one is available.
 +
 +        If unsure, say N.
 +
  config MD_MULTIPATH
        tristate "Multipath I/O support"
        depends on BLK_DEV_MD
@@@ -257,6 -231,17 +257,17 @@@ config DM_MIRRO
           Allow volume managers to mirror logical volumes, also
           needed for live data migration tools such as 'pvmove'.
  
+ config DM_LOG_USERSPACE
+       tristate "Mirror userspace logging (EXPERIMENTAL)"
+       depends on DM_MIRROR && EXPERIMENTAL && NET
+       select CONNECTOR
+       ---help---
+         The userspace logging module provides a mechanism for
+         relaying the dm-dirty-log API to userspace.  Log designs
+         which are more suited to userspace implementation (e.g.
+         shared storage logs) or experimental logs can be implemented
+         by leveraging this framework.
  config DM_ZERO
        tristate "Zero target"
        depends on BLK_DEV_DM
@@@ -275,6 -260,25 +286,25 @@@ config DM_MULTIPAT
        ---help---
          Allow volume managers to support multipath hardware.
  
+ config DM_MULTIPATH_QL
+       tristate "I/O Path Selector based on the number of in-flight I/Os"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path with the least number of in-flight I/Os.
+         If unsure, say N.
+ config DM_MULTIPATH_ST
+       tristate "I/O Path Selector based on the service time"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path expected to complete the incoming I/O in the shortest
+         time.
+         If unsure, say N.
  config DM_DELAY
        tristate "I/O delaying target (EXPERIMENTAL)"
        depends on BLK_DEV_DM && EXPERIMENTAL
diff --combined drivers/md/raid5.c
index 54ef8d75541d3cc8607e37b0a128305390f3dff5,f9f991e6e1389ffc90860571edf701ca13a6f292..cac6f4d3a14310682f72cc5e2468547519c52b6c
@@@ -47,9 -47,7 +47,9 @@@
  #include <linux/kthread.h>
  #include <linux/raid/pq.h>
  #include <linux/async_tx.h>
 +#include <linux/async.h>
  #include <linux/seq_file.h>
 +#include <linux/cpu.h>
  #include "md.h"
  #include "raid5.h"
  #include "bitmap.h"
@@@ -501,18 -499,11 +501,18 @@@ async_copy_data(int frombio, struct bi
        struct page *bio_page;
        int i;
        int page_offset;
 +      struct async_submit_ctl submit;
 +      enum async_tx_flags flags = 0;
  
        if (bio->bi_sector >= sector)
                page_offset = (signed)(bio->bi_sector - sector) * 512;
        else
                page_offset = (signed)(sector - bio->bi_sector) * -512;
 +
 +      if (frombio)
 +              flags |= ASYNC_TX_FENCE;
 +      init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
 +
        bio_for_each_segment(bvl, bio, i) {
                int len = bio_iovec_idx(bio, i)->bv_len;
                int clen;
                        bio_page = bio_iovec_idx(bio, i)->bv_page;
                        if (frombio)
                                tx = async_memcpy(page, bio_page, page_offset,
 -                                      b_offset, clen,
 -                                      ASYNC_TX_DEP_ACK,
 -                                      tx, NULL, NULL);
 +                                                b_offset, clen, &submit);
                        else
                                tx = async_memcpy(bio_page, page, b_offset,
 -                                      page_offset, clen,
 -                                      ASYNC_TX_DEP_ACK,
 -                                      tx, NULL, NULL);
 +                                                page_offset, clen, &submit);
                }
 +              /* chain the operations */
 +              submit.depend_tx = tx;
 +
                if (clen < len) /* hit end of page */
                        break;
                page_offset +=  len;
@@@ -600,7 -592,6 +600,7 @@@ static void ops_run_biofill(struct stri
  {
        struct dma_async_tx_descriptor *tx = NULL;
        raid5_conf_t *conf = sh->raid_conf;
 +      struct async_submit_ctl submit;
        int i;
  
        pr_debug("%s: stripe %llu\n", __func__,
        }
  
        atomic_inc(&sh->count);
 -      async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
 -              ops_complete_biofill, sh);
 +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
 +      async_trigger_callback(&submit);
  }
  
 -static void ops_complete_compute5(void *stripe_head_ref)
 +static void mark_target_uptodate(struct stripe_head *sh, int target)
  {
 -      struct stripe_head *sh = stripe_head_ref;
 -      int target = sh->ops.target;
 -      struct r5dev *tgt = &sh->dev[target];
 +      struct r5dev *tgt;
  
 -      pr_debug("%s: stripe %llu\n", __func__,
 -              (unsigned long long)sh->sector);
 +      if (target < 0)
 +              return;
  
 +      tgt = &sh->dev[target];
        set_bit(R5_UPTODATE, &tgt->flags);
        BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
        clear_bit(R5_Wantcompute, &tgt->flags);
 +}
 +
 +static void ops_complete_compute(void *stripe_head_ref)
 +{
 +      struct stripe_head *sh = stripe_head_ref;
 +
 +      pr_debug("%s: stripe %llu\n", __func__,
 +              (unsigned long long)sh->sector);
 +
 +      /* mark the computed target(s) as uptodate */
 +      mark_target_uptodate(sh, sh->ops.target);
 +      mark_target_uptodate(sh, sh->ops.target2);
 +
        clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
        if (sh->check_state == check_state_compute_run)
                sh->check_state = check_state_compute_result;
        release_stripe(sh);
  }
  
 -static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 +/* return a pointer to the address conversion region of the scribble buffer */
 +static addr_conv_t *to_addr_conv(struct stripe_head *sh,
 +                               struct raid5_percpu *percpu)
 +{
 +      return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
 +}
 +
 +static struct dma_async_tx_descriptor *
 +ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
 -      /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
 -      struct page *xor_srcs[disks];
 +      struct page **xor_srcs = percpu->scribble;
        int target = sh->ops.target;
        struct r5dev *tgt = &sh->dev[target];
        struct page *xor_dest = tgt->page;
        int count = 0;
        struct dma_async_tx_descriptor *tx;
 +      struct async_submit_ctl submit;
        int i;
  
        pr_debug("%s: stripe %llu block: %d\n",
  
        atomic_inc(&sh->count);
  
 +      init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
 +                        ops_complete_compute, sh, to_addr_conv(sh, percpu));
        if (unlikely(count == 1))
 -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
 -                      0, NULL, ops_complete_compute5, sh);
 +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
 +      else
 +              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 +
 +      return tx;
 +}
 +
 +/* set_syndrome_sources - populate source buffers for gen_syndrome
 + * @srcs - (struct page *) array of size sh->disks
 + * @sh - stripe_head to parse
 + *
 + * Populates srcs in proper layout order for the stripe and returns the
 + * 'count' of sources to be used in a call to async_gen_syndrome.  The P
 + * destination buffer is recorded in srcs[count] and the Q destination
 + * is recorded in srcs[count+1]].
 + */
 +static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
 +{
 +      int disks = sh->disks;
 +      int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
 +      int d0_idx = raid6_d0(sh);
 +      int count;
 +      int i;
 +
 +      for (i = 0; i < disks; i++)
 +              srcs[i] = (void *)raid6_empty_zero_page;
 +
 +      count = 0;
 +      i = d0_idx;
 +      do {
 +              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 +
 +              srcs[slot] = sh->dev[i].page;
 +              i = raid6_next_disk(i, disks);
 +      } while (i != d0_idx);
 +      BUG_ON(count != syndrome_disks);
 +
 +      return count;
 +}
 +
 +static struct dma_async_tx_descriptor *
 +ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 +{
 +      int disks = sh->disks;
 +      struct page **blocks = percpu->scribble;
 +      int target;
 +      int qd_idx = sh->qd_idx;
 +      struct dma_async_tx_descriptor *tx;
 +      struct async_submit_ctl submit;
 +      struct r5dev *tgt;
 +      struct page *dest;
 +      int i;
 +      int count;
 +
 +      if (sh->ops.target < 0)
 +              target = sh->ops.target2;
 +      else if (sh->ops.target2 < 0)
 +              target = sh->ops.target;
        else
 -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
 -                      ASYNC_TX_XOR_ZERO_DST, NULL,
 -                      ops_complete_compute5, sh);
 +              /* we should only have one valid target */
 +              BUG();
 +      BUG_ON(target < 0);
 +      pr_debug("%s: stripe %llu block: %d\n",
 +              __func__, (unsigned long long)sh->sector, target);
 +
 +      tgt = &sh->dev[target];
 +      BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
 +      dest = tgt->page;
 +
 +      atomic_inc(&sh->count);
 +
 +      if (target == qd_idx) {
 +              count = set_syndrome_sources(blocks, sh);
 +              blocks[count] = NULL; /* regenerating p is not necessary */
 +              BUG_ON(blocks[count+1] != dest); /* q should already be set */
 +              init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
 +                                ops_complete_compute, sh,
 +                                to_addr_conv(sh, percpu));
 +              tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
 +      } else {
 +              /* Compute any data- or p-drive using XOR */
 +              count = 0;
 +              for (i = disks; i-- ; ) {
 +                      if (i == target || i == qd_idx)
 +                              continue;
 +                      blocks[count++] = sh->dev[i].page;
 +              }
 +
 +              init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
 +                                NULL, ops_complete_compute, sh,
 +                                to_addr_conv(sh, percpu));
 +              tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
 +      }
  
        return tx;
  }
  
 +static struct dma_async_tx_descriptor *
 +ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 +{
 +      int i, count, disks = sh->disks;
 +      int syndrome_disks = sh->ddf_layout ? disks : disks-2;
 +      int d0_idx = raid6_d0(sh);
 +      int faila = -1, failb = -1;
 +      int target = sh->ops.target;
 +      int target2 = sh->ops.target2;
 +      struct r5dev *tgt = &sh->dev[target];
 +      struct r5dev *tgt2 = &sh->dev[target2];
 +      struct dma_async_tx_descriptor *tx;
 +      struct page **blocks = percpu->scribble;
 +      struct async_submit_ctl submit;
 +
 +      pr_debug("%s: stripe %llu block1: %d block2: %d\n",
 +               __func__, (unsigned long long)sh->sector, target, target2);
 +      BUG_ON(target < 0 || target2 < 0);
 +      BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
 +      BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
 +
 +      /* we need to open-code set_syndrome_sources to handle to the
 +       * slot number conversion for 'faila' and 'failb'
 +       */
 +      for (i = 0; i < disks ; i++)
 +              blocks[i] = (void *)raid6_empty_zero_page;
 +      count = 0;
 +      i = d0_idx;
 +      do {
 +              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 +
 +              blocks[slot] = sh->dev[i].page;
 +
 +              if (i == target)
 +                      faila = slot;
 +              if (i == target2)
 +                      failb = slot;
 +              i = raid6_next_disk(i, disks);
 +      } while (i != d0_idx);
 +      BUG_ON(count != syndrome_disks);
 +
 +      BUG_ON(faila == failb);
 +      if (failb < faila)
 +              swap(faila, failb);
 +      pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
 +               __func__, (unsigned long long)sh->sector, faila, failb);
 +
 +      atomic_inc(&sh->count);
 +
 +      if (failb == syndrome_disks+1) {
 +              /* Q disk is one of the missing disks */
 +              if (faila == syndrome_disks) {
 +                      /* Missing P+Q, just recompute */
 +                      init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
 +                                        ops_complete_compute, sh,
 +                                        to_addr_conv(sh, percpu));
 +                      return async_gen_syndrome(blocks, 0, count+2,
 +                                                STRIPE_SIZE, &submit);
 +              } else {
 +                      struct page *dest;
 +                      int data_target;
 +                      int qd_idx = sh->qd_idx;
 +
 +                      /* Missing D+Q: recompute D from P, then recompute Q */
 +                      if (target == qd_idx)
 +                              data_target = target2;
 +                      else
 +                              data_target = target;
 +
 +                      count = 0;
 +                      for (i = disks; i-- ; ) {
 +                              if (i == data_target || i == qd_idx)
 +                                      continue;
 +                              blocks[count++] = sh->dev[i].page;
 +                      }
 +                      dest = sh->dev[data_target].page;
 +                      init_async_submit(&submit,
 +                                        ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
 +                                        NULL, NULL, NULL,
 +                                        to_addr_conv(sh, percpu));
 +                      tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
 +                                     &submit);
 +
 +                      count = set_syndrome_sources(blocks, sh);
 +                      init_async_submit(&submit, ASYNC_TX_FENCE, tx,
 +                                        ops_complete_compute, sh,
 +                                        to_addr_conv(sh, percpu));
 +                      return async_gen_syndrome(blocks, 0, count+2,
 +                                                STRIPE_SIZE, &submit);
 +              }
 +      }
 +
 +      init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
 +                        sh, to_addr_conv(sh, percpu));
 +      if (failb == syndrome_disks) {
 +              /* We're missing D+P. */
 +              return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
 +                                             faila, blocks, &submit);
 +      } else {
 +              /* We're missing D+D. */
 +              return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE,
 +                                             faila, failb, blocks, &submit);
 +      }
 +}
 +
 +
  static void ops_complete_prexor(void *stripe_head_ref)
  {
        struct stripe_head *sh = stripe_head_ref;
  }
  
  static struct dma_async_tx_descriptor *
 -ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 +ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
 +             struct dma_async_tx_descriptor *tx)
  {
 -      /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
 -      struct page *xor_srcs[disks];
 +      struct page **xor_srcs = percpu->scribble;
        int count = 0, pd_idx = sh->pd_idx, i;
 +      struct async_submit_ctl submit;
  
        /* existing parity data subtracted */
        struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
                        xor_srcs[count++] = dev->page;
        }
  
 -      tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
 -              ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
 -              ops_complete_prexor, sh);
 +      init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
 +                        ops_complete_prexor, sh, to_addr_conv(sh, percpu));
 +      tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
  
        return tx;
  }
@@@ -967,21 -742,17 +967,21 @@@ ops_run_biodrain(struct stripe_head *sh
        return tx;
  }
  
 -static void ops_complete_postxor(void *stripe_head_ref)
 +static void ops_complete_reconstruct(void *stripe_head_ref)
  {
        struct stripe_head *sh = stripe_head_ref;
 -      int disks = sh->disks, i, pd_idx = sh->pd_idx;
 +      int disks = sh->disks;
 +      int pd_idx = sh->pd_idx;
 +      int qd_idx = sh->qd_idx;
 +      int i;
  
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
  
        for (i = disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
 -              if (dev->written || i == pd_idx)
 +
 +              if (dev->written || i == pd_idx || i == qd_idx)
                        set_bit(R5_UPTODATE, &dev->flags);
        }
  
  }
  
  static void
 -ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 +ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
 +                   struct dma_async_tx_descriptor *tx)
  {
 -      /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
 -      struct page *xor_srcs[disks];
 -
 +      struct page **xor_srcs = percpu->scribble;
 +      struct async_submit_ctl submit;
        int count = 0, pd_idx = sh->pd_idx, i;
        struct page *xor_dest;
        int prexor = 0;
         * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
         * for the synchronous xor case
         */
 -      flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
 +      flags = ASYNC_TX_ACK |
                (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
  
        atomic_inc(&sh->count);
  
 -      if (unlikely(count == 1)) {
 -              flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
 -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
 -                      flags, tx, ops_complete_postxor, sh);
 -      } else
 -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
 -                      flags, tx, ops_complete_postxor, sh);
 +      init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
 +                        to_addr_conv(sh, percpu));
 +      if (unlikely(count == 1))
 +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
 +      else
 +              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 +}
 +
 +static void
 +ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
 +                   struct dma_async_tx_descriptor *tx)
 +{
 +      struct async_submit_ctl submit;
 +      struct page **blocks = percpu->scribble;
 +      int count;
 +
 +      pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
 +
 +      count = set_syndrome_sources(blocks, sh);
 +
 +      atomic_inc(&sh->count);
 +
 +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
 +                        sh, to_addr_conv(sh, percpu));
 +      async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
  }
  
  static void ops_complete_check(void *stripe_head_ref)
        release_stripe(sh);
  }
  
 -static void ops_run_check(struct stripe_head *sh)
 +static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
 -      /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
 -      struct page *xor_srcs[disks];
 +      int pd_idx = sh->pd_idx;
 +      int qd_idx = sh->qd_idx;
 +      struct page *xor_dest;
 +      struct page **xor_srcs = percpu->scribble;
        struct dma_async_tx_descriptor *tx;
 -
 -      int count = 0, pd_idx = sh->pd_idx, i;
 -      struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
 +      struct async_submit_ctl submit;
 +      int count;
 +      int i;
  
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
  
 +      count = 0;
 +      xor_dest = sh->dev[pd_idx].page;
 +      xor_srcs[count++] = xor_dest;
        for (i = disks; i--; ) {
 -              struct r5dev *dev = &sh->dev[i];
 -              if (i != pd_idx)
 -                      xor_srcs[count++] = dev->page;
 +              if (i == pd_idx || i == qd_idx)
 +                      continue;
 +              xor_srcs[count++] = sh->dev[i].page;
        }
  
 -      tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
 -              &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
 +      init_async_submit(&submit, 0, NULL, NULL, NULL,
 +                        to_addr_conv(sh, percpu));
 +      tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
 +                         &sh->ops.zero_sum_result, &submit);
  
        atomic_inc(&sh->count);
 -      tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
 -              ops_complete_check, sh);
 +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
 +      tx = async_trigger_callback(&submit);
  }
  
 -static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
 +static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
 +{
 +      struct page **srcs = percpu->scribble;
 +      struct async_submit_ctl submit;
 +      int count;
 +
 +      pr_debug("%s: stripe %llu checkp: %d\n", __func__,
 +              (unsigned long long)sh->sector, checkp);
 +
 +      count = set_syndrome_sources(srcs, sh);
 +      if (!checkp)
 +              srcs[count] = NULL;
 +
 +      atomic_inc(&sh->count);
 +      init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
 +                        sh, to_addr_conv(sh, percpu));
 +      async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
 +                         &sh->ops.zero_sum_result, percpu->spare_page, &submit);
 +}
 +
 +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  {
        int overlap_clear = 0, i, disks = sh->disks;
        struct dma_async_tx_descriptor *tx = NULL;
 +      raid5_conf_t *conf = sh->raid_conf;
 +      int level = conf->level;
 +      struct raid5_percpu *percpu;
 +      unsigned long cpu;
  
 +      cpu = get_cpu();
 +      percpu = per_cpu_ptr(conf->percpu, cpu);
        if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                ops_run_biofill(sh);
                overlap_clear++;
        }
  
        if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
 -              tx = ops_run_compute5(sh);
 -              /* terminate the chain if postxor is not set to be run */
 -              if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
 +              if (level < 6)
 +                      tx = ops_run_compute5(sh, percpu);
 +              else {
 +                      if (sh->ops.target2 < 0 || sh->ops.target < 0)
 +                              tx = ops_run_compute6_1(sh, percpu);
 +                      else
 +                              tx = ops_run_compute6_2(sh, percpu);
 +              }
 +              /* terminate the chain if reconstruct is not set to be run */
 +              if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
                        async_tx_ack(tx);
        }
  
        if (test_bit(STRIPE_OP_PREXOR, &ops_request))
 -              tx = ops_run_prexor(sh, tx);
 +              tx = ops_run_prexor(sh, percpu, tx);
  
        if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                tx = ops_run_biodrain(sh, tx);
                overlap_clear++;
        }
  
 -      if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
 -              ops_run_postxor(sh, tx);
 +      if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
 +              if (level < 6)
 +                      ops_run_reconstruct5(sh, percpu, tx);
 +              else
 +                      ops_run_reconstruct6(sh, percpu, tx);
 +      }
  
 -      if (test_bit(STRIPE_OP_CHECK, &ops_request))
 -              ops_run_check(sh);
 +      if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
 +              if (sh->check_state == check_state_run)
 +                      ops_run_check_p(sh, percpu);
 +              else if (sh->check_state == check_state_run_q)
 +                      ops_run_check_pq(sh, percpu, 0);
 +              else if (sh->check_state == check_state_run_pq)
 +                      ops_run_check_pq(sh, percpu, 1);
 +              else
 +                      BUG();
 +      }
  
        if (overlap_clear)
                for (i = disks; i--; ) {
                        if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                wake_up(&sh->raid_conf->wait_for_overlap);
                }
 +      put_cpu();
  }
  
  static int grow_one_stripe(raid5_conf_t *conf)
@@@ -1248,28 -948,6 +1248,28 @@@ static int grow_stripes(raid5_conf_t *c
        return 0;
  }
  
 +/**
 + * scribble_len - return the required size of the scribble region
 + * @num - total number of disks in the array
 + *
 + * The size must be enough to contain:
 + * 1/ a struct page pointer for each device in the array +2
 + * 2/ room to convert each entry in (1) to its corresponding dma
 + *    (dma_map_page()) or page (page_address()) address.
 + *
 + * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
 + * calculate over all devices (not just the data blocks), using zeros in place
 + * of the P and Q blocks.
 + */
 +static size_t scribble_len(int num)
 +{
 +      size_t len;
 +
 +      len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
 +
 +      return len;
 +}
 +
  static int resize_stripes(raid5_conf_t *conf, int newsize)
  {
        /* Make all the stripes able to hold 'newsize' devices.
        struct stripe_head *osh, *nsh;
        LIST_HEAD(newstripes);
        struct disk_info *ndisks;
 +      unsigned long cpu;
        int err;
        struct kmem_cache *sc;
        int i;
        /* Step 3.
         * At this point, we are holding all the stripes so the array
         * is completely stalled, so now is a good time to resize
 -       * conf->disks.
 +       * conf->disks and the scribble region
         */
        ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
        if (ndisks) {
        } else
                err = -ENOMEM;
  
 +      get_online_cpus();
 +      conf->scribble_len = scribble_len(newsize);
 +      for_each_present_cpu(cpu) {
 +              struct raid5_percpu *percpu;
 +              void *scribble;
 +
 +              percpu = per_cpu_ptr(conf->percpu, cpu);
 +              scribble = kmalloc(conf->scribble_len, GFP_NOIO);
 +
 +              if (scribble) {
 +                      kfree(percpu->scribble);
 +                      percpu->scribble = scribble;
 +              } else {
 +                      err = -ENOMEM;
 +                      break;
 +              }
 +      }
 +      put_online_cpus();
 +
        /* Step 4, return new stripes to service */
        while(!list_empty(&newstripes)) {
                nsh = list_entry(newstripes.next, struct stripe_head, lru);
                list_del_init(&nsh->lru);
 +
                for (i=conf->raid_disks; i < newsize; i++)
                        if (nsh->dev[i].page == NULL) {
                                struct page *p = alloc_page(GFP_NOIO);
@@@ -1617,8 -1274,8 +1617,8 @@@ static sector_t raid5_compute_sector(ra
        sector_t new_sector;
        int algorithm = previous ? conf->prev_algo
                                 : conf->algorithm;
-       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
-                                        : (conf->chunk_size >> 9);
+       int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+                                        : conf->chunk_sectors;
        int raid_disks = previous ? conf->previous_raid_disks
                                  : conf->raid_disks;
        int data_disks = raid_disks - conf->max_degraded;
@@@ -1823,8 -1480,8 +1823,8 @@@ static sector_t compute_blocknr(struct 
        int raid_disks = sh->disks;
        int data_disks = raid_disks - conf->max_degraded;
        sector_t new_sector = sh->sector, check;
-       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
-                                        : (conf->chunk_size >> 9);
+       int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+                                        : conf->chunk_sectors;
        int algorithm = previous ? conf->prev_algo
                                 : conf->algorithm;
        sector_t stripe;
  }
  
  
 -
 -/*
 - * Copy data between a page in the stripe cache, and one or more bion
 - * The page could align with the middle of the bio, or there could be
 - * several bion, each with several bio_vecs, which cover part of the page
 - * Multiple bion are linked together on bi_next.  There may be extras
 - * at the end of this list.  We ignore them.
 - */
 -static void copy_data(int frombio, struct bio *bio,
 -                   struct page *page,
 -                   sector_t sector)
 -{
 -      char *pa = page_address(page);
 -      struct bio_vec *bvl;
 -      int i;
 -      int page_offset;
 -
 -      if (bio->bi_sector >= sector)
 -              page_offset = (signed)(bio->bi_sector - sector) * 512;
 -      else
 -              page_offset = (signed)(sector - bio->bi_sector) * -512;
 -      bio_for_each_segment(bvl, bio, i) {
 -              int len = bio_iovec_idx(bio,i)->bv_len;
 -              int clen;
 -              int b_offset = 0;
 -
 -              if (page_offset < 0) {
 -                      b_offset = -page_offset;
 -                      page_offset += b_offset;
 -                      len -= b_offset;
 -              }
 -
 -              if (len > 0 && page_offset + len > STRIPE_SIZE)
 -                      clen = STRIPE_SIZE - page_offset;
 -              else clen = len;
 -
 -              if (clen > 0) {
 -                      char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
 -                      if (frombio)
 -                              memcpy(pa+page_offset, ba+b_offset, clen);
 -                      else
 -                              memcpy(ba+b_offset, pa+page_offset, clen);
 -                      __bio_kunmap_atomic(ba, KM_USER0);
 -              }
 -              if (clen < len) /* hit end of page */
 -                      break;
 -              page_offset +=  len;
 -      }
 -}
 -
 -#define check_xor()   do {                                              \
 -                              if (count == MAX_XOR_BLOCKS) {            \
 -                              xor_blocks(count, STRIPE_SIZE, dest, ptr);\
 -                              count = 0;                                \
 -                         }                                              \
 -                      } while(0)
 -
 -static void compute_parity6(struct stripe_head *sh, int method)
 -{
 -      raid5_conf_t *conf = sh->raid_conf;
 -      int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
 -      int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
 -      struct bio *chosen;
 -      /**** FIX THIS: This could be very bad if disks is close to 256 ****/
 -      void *ptrs[syndrome_disks+2];
 -
 -      pd_idx = sh->pd_idx;
 -      qd_idx = sh->qd_idx;
 -      d0_idx = raid6_d0(sh);
 -
 -      pr_debug("compute_parity, stripe %llu, method %d\n",
 -              (unsigned long long)sh->sector, method);
 -
 -      switch(method) {
 -      case READ_MODIFY_WRITE:
 -              BUG();          /* READ_MODIFY_WRITE N/A for RAID-6 */
 -      case RECONSTRUCT_WRITE:
 -              for (i= disks; i-- ;)
 -                      if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
 -                              chosen = sh->dev[i].towrite;
 -                              sh->dev[i].towrite = NULL;
 -
 -                              if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 -                                      wake_up(&conf->wait_for_overlap);
 -
 -                              BUG_ON(sh->dev[i].written);
 -                              sh->dev[i].written = chosen;
 -                      }
 -              break;
 -      case CHECK_PARITY:
 -              BUG();          /* Not implemented yet */
 -      }
 -
 -      for (i = disks; i--;)
 -              if (sh->dev[i].written) {
 -                      sector_t sector = sh->dev[i].sector;
 -                      struct bio *wbi = sh->dev[i].written;
 -                      while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
 -                              copy_data(1, wbi, sh->dev[i].page, sector);
 -                              wbi = r5_next_bio(wbi, sector);
 -                      }
 -
 -                      set_bit(R5_LOCKED, &sh->dev[i].flags);
 -                      set_bit(R5_UPTODATE, &sh->dev[i].flags);
 -              }
 -
 -      /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
 -
 -      for (i = 0; i < disks; i++)
 -              ptrs[i] = (void *)raid6_empty_zero_page;
 -
 -      count = 0;
 -      i = d0_idx;
 -      do {
 -              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 -
 -              ptrs[slot] = page_address(sh->dev[i].page);
 -              if (slot < syndrome_disks &&
 -                  !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
 -                      printk(KERN_ERR "block %d/%d not uptodate "
 -                             "on parity calc\n", i, count);
 -                      BUG();
 -              }
 -
 -              i = raid6_next_disk(i, disks);
 -      } while (i != d0_idx);
 -      BUG_ON(count != syndrome_disks);
 -
 -      raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
 -
 -      switch(method) {
 -      case RECONSTRUCT_WRITE:
 -              set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
 -              set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
 -              set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
 -              set_bit(R5_LOCKED,   &sh->dev[qd_idx].flags);
 -              break;
 -      case UPDATE_PARITY:
 -              set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
 -              set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
 -              break;
 -      }
 -}
 -
 -
 -/* Compute one missing block */
 -static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
 -{
 -      int i, count, disks = sh->disks;
 -      void *ptr[MAX_XOR_BLOCKS], *dest, *p;
 -      int qd_idx = sh->qd_idx;
 -
 -      pr_debug("compute_block_1, stripe %llu, idx %d\n",
 -              (unsigned long long)sh->sector, dd_idx);
 -
 -      if ( dd_idx == qd_idx ) {
 -              /* We're actually computing the Q drive */
 -              compute_parity6(sh, UPDATE_PARITY);
 -      } else {
 -              dest = page_address(sh->dev[dd_idx].page);
 -              if (!nozero) memset(dest, 0, STRIPE_SIZE);
 -              count = 0;
 -              for (i = disks ; i--; ) {
 -                      if (i == dd_idx || i == qd_idx)
 -                              continue;
 -                      p = page_address(sh->dev[i].page);
 -                      if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
 -                              ptr[count++] = p;
 -                      else
 -                              printk("compute_block() %d, stripe %llu, %d"
 -                                     " not present\n", dd_idx,
 -                                     (unsigned long long)sh->sector, i);
 -
 -                      check_xor();
 -              }
 -              if (count)
 -                      xor_blocks(count, STRIPE_SIZE, dest, ptr);
 -              if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
 -              else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
 -      }
 -}
 -
 -/* Compute two missing blocks */
 -static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 -{
 -      int i, count, disks = sh->disks;
 -      int syndrome_disks = sh->ddf_layout ? disks : disks-2;
 -      int d0_idx = raid6_d0(sh);
 -      int faila = -1, failb = -1;
 -      /**** FIX THIS: This could be very bad if disks is close to 256 ****/
 -      void *ptrs[syndrome_disks+2];
 -
 -      for (i = 0; i < disks ; i++)
 -              ptrs[i] = (void *)raid6_empty_zero_page;
 -      count = 0;
 -      i = d0_idx;
 -      do {
 -              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
 -
 -              ptrs[slot] = page_address(sh->dev[i].page);
 -
 -              if (i == dd_idx1)
 -                      faila = slot;
 -              if (i == dd_idx2)
 -                      failb = slot;
 -              i = raid6_next_disk(i, disks);
 -      } while (i != d0_idx);
 -      BUG_ON(count != syndrome_disks);
 -
 -      BUG_ON(faila == failb);
 -      if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
 -
 -      pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
 -               (unsigned long long)sh->sector, dd_idx1, dd_idx2,
 -               faila, failb);
 -
 -      if (failb == syndrome_disks+1) {
 -              /* Q disk is one of the missing disks */
 -              if (faila == syndrome_disks) {
 -                      /* Missing P+Q, just recompute */
 -                      compute_parity6(sh, UPDATE_PARITY);
 -                      return;
 -              } else {
 -                      /* We're missing D+Q; recompute D from P */
 -                      compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
 -                                           dd_idx2 : dd_idx1),
 -                                      0);
 -                      compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
 -                      return;
 -              }
 -      }
 -
 -      /* We're missing D+P or D+D; */
 -      if (failb == syndrome_disks) {
 -              /* We're missing D+P. */
 -              raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
 -      } else {
 -              /* We're missing D+D. */
 -              raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
 -                                ptrs);
 -      }
 -
 -      /* Both the above update both missing blocks */
 -      set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
 -      set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
 -}
 -
  static void
 -schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
 +schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                         int rcw, int expand)
  {
        int i, pd_idx = sh->pd_idx, disks = sh->disks;
 +      raid5_conf_t *conf = sh->raid_conf;
 +      int level = conf->level;
  
        if (rcw) {
                /* if we are not expanding this is a proper write request, and
                } else
                        sh->reconstruct_state = reconstruct_state_run;
  
 -              set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
 +              set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                                s->locked++;
                        }
                }
 -              if (s->locked + 1 == disks)
 +              if (s->locked + conf->max_degraded == disks)
                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
 -                              atomic_inc(&sh->raid_conf->pending_full_writes);
 +                              atomic_inc(&conf->pending_full_writes);
        } else {
 +              BUG_ON(level == 6);
                BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                        test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
  
                sh->reconstruct_state = reconstruct_state_prexor_drain_run;
                set_bit(STRIPE_OP_PREXOR, &s->ops_request);
                set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
 -              set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
 +              set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                }
        }
  
 -      /* keep the parity disk locked while asynchronous operations
 +      /* keep the parity disk(s) locked while asynchronous operations
         * are in flight
         */
        set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
        clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
        s->locked++;
  
 +      if (level == 6) {
 +              int qd_idx = sh->qd_idx;
 +              struct r5dev *dev = &sh->dev[qd_idx];
 +
 +              set_bit(R5_LOCKED, &dev->flags);
 +              clear_bit(R5_UPTODATE, &dev->flags);
 +              s->locked++;
 +      }
 +
        pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
                __func__, (unsigned long long)sh->sector,
                s->locked, s->ops_request);
@@@ -2094,12 -1986,18 +2094,11 @@@ static int add_stripe_bio(struct stripe
  
  static void end_reshape(raid5_conf_t *conf);
  
 -static int page_is_zero(struct page *p)
 -{
 -      char *a = page_address(p);
 -      return ((*(u32*)a) == 0 &&
 -              memcmp(a, a+4, STRIPE_SIZE-4)==0);
 -}
 -
  static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                            struct stripe_head *sh)
  {
        int sectors_per_chunk =
-               previous ? (conf->prev_chunk >> 9)
-                        : (conf->chunk_size >> 9);
+               previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
        int dd_idx;
        int chunk_offset = sector_div(stripe, sectors_per_chunk);
        int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@@ -2234,10 -2132,9 +2233,10 @@@ static int fetch_block5(struct stripe_h
                        set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                        set_bit(R5_Wantcompute, &dev->flags);
                        sh->ops.target = disk_idx;
 +                      sh->ops.target2 = -1;
                        s->req_compute = 1;
                        /* Careful: from this point on 'uptodate' is in the eye
 -                       * of raid5_run_ops which services 'compute' operations
 +                       * of raid_run_ops which services 'compute' operations
                         * before writes. R5_Wantcompute flags a block that will
                         * be R5_UPTODATE by the time it is needed for a
                         * subsequent operation.
@@@ -2276,104 -2173,61 +2275,104 @@@ static void handle_stripe_fill5(struct 
        set_bit(STRIPE_HANDLE, &sh->state);
  }
  
 -static void handle_stripe_fill6(struct stripe_head *sh,
 -                      struct stripe_head_state *s, struct r6_state *r6s,
 -                      int disks)
 +/* fetch_block6 - checks the given member device to see if its data needs
 + * to be read or computed to satisfy a request.
 + *
 + * Returns 1 when no more member devices need to be checked, otherwise returns
 + * 0 to tell the loop in handle_stripe_fill6 to continue
 + */
 +static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
 +                       struct r6_state *r6s, int disk_idx, int disks)
  {
 -      int i;
 -      for (i = disks; i--; ) {
 -              struct r5dev *dev = &sh->dev[i];
 -              if (!test_bit(R5_LOCKED, &dev->flags) &&
 -                  !test_bit(R5_UPTODATE, &dev->flags) &&
 -                  (dev->toread || (dev->towrite &&
 -                   !test_bit(R5_OVERWRITE, &dev->flags)) ||
 -                   s->syncing || s->expanding ||
 -                   (s->failed >= 1 &&
 -                    (sh->dev[r6s->failed_num[0]].toread ||
 -                     s->to_write)) ||
 -                   (s->failed >= 2 &&
 -                    (sh->dev[r6s->failed_num[1]].toread ||
 -                     s->to_write)))) {
 -                      /* we would like to get this block, possibly
 -                       * by computing it, but we might not be able to
 +      struct r5dev *dev = &sh->dev[disk_idx];
 +      struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
 +                                &sh->dev[r6s->failed_num[1]] };
 +
 +      if (!test_bit(R5_LOCKED, &dev->flags) &&
 +          !test_bit(R5_UPTODATE, &dev->flags) &&
 +          (dev->toread ||
 +           (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
 +           s->syncing || s->expanding ||
 +           (s->failed >= 1 &&
 +            (fdev[0]->toread || s->to_write)) ||
 +           (s->failed >= 2 &&
 +            (fdev[1]->toread || s->to_write)))) {
 +              /* we would like to get this block, possibly by computing it,
 +               * otherwise read it if the backing disk is insync
 +               */
 +              BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
 +              BUG_ON(test_bit(R5_Wantread, &dev->flags));
 +              if ((s->uptodate == disks - 1) &&
 +                  (s->failed && (disk_idx == r6s->failed_num[0] ||
 +                                 disk_idx == r6s->failed_num[1]))) {
 +                      /* have disk failed, and we're requested to fetch it;
 +                       * do compute it
                         */
 -                      if ((s->uptodate == disks - 1) &&
 -                          (s->failed && (i == r6s->failed_num[0] ||
 -                                         i == r6s->failed_num[1]))) {
 -                              pr_debug("Computing stripe %llu block %d\n",
 -                                     (unsigned long long)sh->sector, i);
 -                              compute_block_1(sh, i, 0);
 -                              s->uptodate++;
 -                      } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
 -                              /* Computing 2-failure is *very* expensive; only
 -                               * do it if failed >= 2
 -                               */
 -                              int other;
 -                              for (other = disks; other--; ) {
 -                                      if (other == i)
 -                                              continue;
 -                                      if (!test_bit(R5_UPTODATE,
 -                                            &sh->dev[other].flags))
 -                                              break;
 -                              }
 -                              BUG_ON(other < 0);
 -                              pr_debug("Computing stripe %llu blocks %d,%d\n",
 -                                     (unsigned long long)sh->sector,
 -                                     i, other);
 -                              compute_block_2(sh, i, other);
 -                              s->uptodate += 2;
 -                      } else if (test_bit(R5_Insync, &dev->flags)) {
 -                              set_bit(R5_LOCKED, &dev->flags);
 -                              set_bit(R5_Wantread, &dev->flags);
 -                              s->locked++;
 -                              pr_debug("Reading block %d (sync=%d)\n",
 -                                      i, s->syncing);
 +                      pr_debug("Computing stripe %llu block %d\n",
 +                             (unsigned long long)sh->sector, disk_idx);
 +                      set_bit(STRIPE_COMPUTE_RUN, &sh->state);
 +                      set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
 +                      set_bit(R5_Wantcompute, &dev->flags);
 +                      sh->ops.target = disk_idx;
 +                      sh->ops.target2 = -1; /* no 2nd target */
 +                      s->req_compute = 1;
 +                      s->uptodate++;
 +                      return 1;
 +              } else if (s->uptodate == disks-2 && s->failed >= 2) {
 +                      /* Computing 2-failure is *very* expensive; only
 +                       * do it if failed >= 2
 +                       */
 +                      int other;
 +                      for (other = disks; other--; ) {
 +                              if (other == disk_idx)
 +                                      continue;
 +                              if (!test_bit(R5_UPTODATE,
 +                                    &sh->dev[other].flags))
 +                                      break;
                        }
 +                      BUG_ON(other < 0);
 +                      pr_debug("Computing stripe %llu blocks %d,%d\n",
 +                             (unsigned long long)sh->sector,
 +                             disk_idx, other);
 +                      set_bit(STRIPE_COMPUTE_RUN, &sh->state);
 +                      set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
 +                      set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
 +                      set_bit(R5_Wantcompute, &sh->dev[other].flags);
 +                      sh->ops.target = disk_idx;
 +                      sh->ops.target2 = other;
 +                      s->uptodate += 2;
 +                      s->req_compute = 1;
 +                      return 1;
 +              } else if (test_bit(R5_Insync, &dev->flags)) {
 +                      set_bit(R5_LOCKED, &dev->flags);
 +                      set_bit(R5_Wantread, &dev->flags);
 +                      s->locked++;
 +                      pr_debug("Reading block %d (sync=%d)\n",
 +                              disk_idx, s->syncing);
                }
        }
 +
 +      return 0;
 +}
 +
 +/**
 + * handle_stripe_fill6 - read or compute data to satisfy pending requests.
 + */
 +static void handle_stripe_fill6(struct stripe_head *sh,
 +                      struct stripe_head_state *s, struct r6_state *r6s,
 +                      int disks)
 +{
 +      int i;
 +
 +      /* look for blocks to read/compute, skip this if a compute
 +       * is already in flight, or if the stripe contents are in the
 +       * midst of changing due to a write
 +       */
 +      if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
 +          !sh->reconstruct_state)
 +              for (i = disks; i--; )
 +                      if (fetch_block6(sh, s, r6s, i, disks))
 +                              break;
        set_bit(STRIPE_HANDLE, &sh->state);
  }
  
@@@ -2507,61 -2361,114 +2506,61 @@@ static void handle_stripe_dirtying5(rai
         */
        /* since handle_stripe can be called at any time we need to handle the
         * case where a compute block operation has been submitted and then a
 -       * subsequent call wants to start a write request.  raid5_run_ops only
 -       * handles the case where compute block and postxor are requested
 +       * subsequent call wants to start a write request.  raid_run_ops only
 +       * handles the case where compute block and reconstruct are requested
         * simultaneously.  If this is not the case then new writes need to be
         * held off until the compute completes.
         */
        if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
            (s->locked == 0 && (rcw == 0 || rmw == 0) &&
            !test_bit(STRIPE_BIT_DELAY, &sh->state)))
 -              schedule_reconstruction5(sh, s, rcw == 0, 0);
 +              schedule_reconstruction(sh, s, rcw == 0, 0);
  }
  
  static void handle_stripe_dirtying6(raid5_conf_t *conf,
                struct stripe_head *sh, struct stripe_head_state *s,
                struct r6_state *r6s, int disks)
  {
 -      int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
 +      int rcw = 0, pd_idx = sh->pd_idx, i;
        int qd_idx = sh->qd_idx;
 +
 +      set_bit(STRIPE_HANDLE, &sh->state);
        for (i = disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
 -              /* Would I have to read this buffer for reconstruct_write */
 -              if (!test_bit(R5_OVERWRITE, &dev->flags)
 -                  && i != pd_idx && i != qd_idx
 -                  && (!test_bit(R5_LOCKED, &dev->flags)
 -                          ) &&
 -                  !test_bit(R5_UPTODATE, &dev->flags)) {
 -                      if (test_bit(R5_Insync, &dev->flags)) rcw++;
 -                      else {
 -                              pr_debug("raid6: must_compute: "
 -                                      "disk %d flags=%#lx\n", i, dev->flags);
 -                              must_compute++;
 -                      }
 -              }
 -      }
 -      pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
 -             (unsigned long long)sh->sector, rcw, must_compute);
 -      set_bit(STRIPE_HANDLE, &sh->state);
 -
 -      if (rcw > 0)
 -              /* want reconstruct write, but need to get some data */
 -              for (i = disks; i--; ) {
 -                      struct r5dev *dev = &sh->dev[i];
 -                      if (!test_bit(R5_OVERWRITE, &dev->flags)
 -                          && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
 -                          && !test_bit(R5_LOCKED, &dev->flags) &&
 -                          !test_bit(R5_UPTODATE, &dev->flags) &&
 -                          test_bit(R5_Insync, &dev->flags)) {
 -                              if (
 -                                test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 -                                      pr_debug("Read_old stripe %llu "
 -                                              "block %d for Reconstruct\n",
 -                                           (unsigned long long)sh->sector, i);
 -                                      set_bit(R5_LOCKED, &dev->flags);
 -                                      set_bit(R5_Wantread, &dev->flags);
 -                                      s->locked++;
 -                              } else {
 -                                      pr_debug("Request delayed stripe %llu "
 -                                              "block %d for Reconstruct\n",
 -                                           (unsigned long long)sh->sector, i);
 -                                      set_bit(STRIPE_DELAYED, &sh->state);
 -                                      set_bit(STRIPE_HANDLE, &sh->state);
 -                              }
 +              /* check if we haven't enough data */
 +              if (!test_bit(R5_OVERWRITE, &dev->flags) &&
 +                  i != pd_idx && i != qd_idx &&
 +                  !test_bit(R5_LOCKED, &dev->flags) &&
 +                  !(test_bit(R5_UPTODATE, &dev->flags) ||
 +                    test_bit(R5_Wantcompute, &dev->flags))) {
 +                      rcw++;
 +                      if (!test_bit(R5_Insync, &dev->flags))
 +                              continue; /* it's a failed drive */
 +
 +                      if (
 +                        test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 +                              pr_debug("Read_old stripe %llu "
 +                                      "block %d for Reconstruct\n",
 +                                   (unsigned long long)sh->sector, i);
 +                              set_bit(R5_LOCKED, &dev->flags);
 +                              set_bit(R5_Wantread, &dev->flags);
 +                              s->locked++;
 +                      } else {
 +                              pr_debug("Request delayed stripe %llu "
 +                                      "block %d for Reconstruct\n",
 +                                   (unsigned long long)sh->sector, i);
 +                              set_bit(STRIPE_DELAYED, &sh->state);
 +                              set_bit(STRIPE_HANDLE, &sh->state);
                        }
                }
 +      }
        /* now if nothing is locked, and if we have enough data, we can start a
         * write request
         */
 -      if (s->locked == 0 && rcw == 0 &&
 +      if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
 +          s->locked == 0 && rcw == 0 &&
            !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
 -              if (must_compute > 0) {
 -                      /* We have failed blocks and need to compute them */
 -                      switch (s->failed) {
 -                      case 0:
 -                              BUG();
 -                      case 1:
 -                              compute_block_1(sh, r6s->failed_num[0], 0);
 -                              break;
 -                      case 2:
 -                              compute_block_2(sh, r6s->failed_num[0],
 -                                              r6s->failed_num[1]);
 -                              break;
 -                      default: /* This request should have been failed? */
 -                              BUG();
 -                      }
 -              }
 -
 -              pr_debug("Computing parity for stripe %llu\n",
 -                      (unsigned long long)sh->sector);
 -              compute_parity6(sh, RECONSTRUCT_WRITE);
 -              /* now every locked buffer is ready to be written */
 -              for (i = disks; i--; )
 -                      if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
 -                              pr_debug("Writing stripe %llu block %d\n",
 -                                     (unsigned long long)sh->sector, i);
 -                              s->locked++;
 -                              set_bit(R5_Wantwrite, &sh->dev[i].flags);
 -                      }
 -              if (s->locked == disks)
 -                      if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
 -                              atomic_inc(&conf->pending_full_writes);
 -              /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
 -              set_bit(STRIPE_INSYNC, &sh->state);
 -
 -              if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 -                      atomic_dec(&conf->preread_active_stripes);
 -                      if (atomic_read(&conf->preread_active_stripes) <
 -                          IO_THRESHOLD)
 -                              md_wakeup_thread(conf->mddev->thread);
 -              }
 +              schedule_reconstruction(sh, s, 1, 0);
        }
  }
  
@@@ -2620,7 -2527,7 +2619,7 @@@ static void handle_parity_checks5(raid5
                 * we are done.  Otherwise update the mismatch count and repair
                 * parity if !MD_RECOVERY_CHECK
                 */
 -              if (sh->ops.zero_sum_result == 0)
 +              if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
                        /* parity is correct (on disc,
                         * not in buffer any more)
                         */
                                set_bit(R5_Wantcompute,
                                        &sh->dev[sh->pd_idx].flags);
                                sh->ops.target = sh->pd_idx;
 +                              sh->ops.target2 = -1;
                                s->uptodate++;
                        }
                }
  
  
  static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
 -                              struct stripe_head_state *s,
 -                              struct r6_state *r6s, struct page *tmp_page,
 -                              int disks)
 +                                struct stripe_head_state *s,
 +                                struct r6_state *r6s, int disks)
  {
 -      int update_p = 0, update_q = 0;
 -      struct r5dev *dev;
        int pd_idx = sh->pd_idx;
        int qd_idx = sh->qd_idx;
 +      struct r5dev *dev;
  
        set_bit(STRIPE_HANDLE, &sh->state);
  
        BUG_ON(s->failed > 2);
 -      BUG_ON(s->uptodate < disks);
 +
        /* Want to check and possibly repair P and Q.
         * However there could be one 'failed' device, in which
         * case we can only check one of them, possibly using the
         * other to generate missing data
         */
  
 -      /* If !tmp_page, we cannot do the calculations,
 -       * but as we have set STRIPE_HANDLE, we will soon be called
 -       * by stripe_handle with a tmp_page - just wait until then.
 -       */
 -      if (tmp_page) {
 +      switch (sh->check_state) {
 +      case check_state_idle:
 +              /* start a new check operation if there are < 2 failures */
                if (s->failed == r6s->q_failed) {
 -                      /* The only possible failed device holds 'Q', so it
 +                      /* The only possible failed device holds Q, so it
                         * makes sense to check P (If anything else were failed,
                         * we would have used P to recreate it).
                         */
 -                      compute_block_1(sh, pd_idx, 1);
 -                      if (!page_is_zero(sh->dev[pd_idx].page)) {
 -                              compute_block_1(sh, pd_idx, 0);
 -                              update_p = 1;
 -                      }
 +                      sh->check_state = check_state_run;
                }
                if (!r6s->q_failed && s->failed < 2) {
 -                      /* q is not failed, and we didn't use it to generate
 +                      /* Q is not failed, and we didn't use it to generate
                         * anything, so it makes sense to check it
                         */
 -                      memcpy(page_address(tmp_page),
 -                             page_address(sh->dev[qd_idx].page),
 -                             STRIPE_SIZE);
 -                      compute_parity6(sh, UPDATE_PARITY);
 -                      if (memcmp(page_address(tmp_page),
 -                                 page_address(sh->dev[qd_idx].page),
 -                                 STRIPE_SIZE) != 0) {
 -                              clear_bit(STRIPE_INSYNC, &sh->state);
 -                              update_q = 1;
 -                      }
 +                      if (sh->check_state == check_state_run)
 +                              sh->check_state = check_state_run_pq;
 +                      else
 +                              sh->check_state = check_state_run_q;
                }
 -              if (update_p || update_q) {
 -                      conf->mddev->resync_mismatches += STRIPE_SECTORS;
 -                      if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
 -                              /* don't try to repair!! */
 -                              update_p = update_q = 0;
 +
 +              /* discard potentially stale zero_sum_result */
 +              sh->ops.zero_sum_result = 0;
 +
 +              if (sh->check_state == check_state_run) {
 +                      /* async_xor_zero_sum destroys the contents of P */
 +                      clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
 +                      s->uptodate--;
 +              }
 +              if (sh->check_state >= check_state_run &&
 +                  sh->check_state <= check_state_run_pq) {
 +                      /* async_syndrome_zero_sum preserves P and Q, so
 +                       * no need to mark them !uptodate here
 +                       */
 +                      set_bit(STRIPE_OP_CHECK, &s->ops_request);
 +                      break;
                }
  
 +              /* we have 2-disk failure */
 +              BUG_ON(s->failed != 2);
 +              /* fall through */
 +      case check_state_compute_result:
 +              sh->check_state = check_state_idle;
 +
 +              /* check that a write has not made the stripe insync */
 +              if (test_bit(STRIPE_INSYNC, &sh->state))
 +                      break;
 +
                /* now write out any block on a failed drive,
 -               * or P or Q if they need it
 +               * or P or Q if they were recomputed
                 */
 -
 +              BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
                if (s->failed == 2) {
                        dev = &sh->dev[r6s->failed_num[1]];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
                }
 -
 -              if (update_p) {
 +              if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
                        dev = &sh->dev[pd_idx];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
                }
 -              if (update_q) {
 +              if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
                        dev = &sh->dev[qd_idx];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                clear_bit(STRIPE_DEGRADED, &sh->state);
  
                set_bit(STRIPE_INSYNC, &sh->state);
 +              break;
 +      case check_state_run:
 +      case check_state_run_q:
 +      case check_state_run_pq:
 +              break; /* we will be called again upon completion */
 +      case check_state_check_result:
 +              sh->check_state = check_state_idle;
 +
 +              /* handle a successful check operation, if parity is correct
 +               * we are done.  Otherwise update the mismatch count and repair
 +               * parity if !MD_RECOVERY_CHECK
 +               */
 +              if (sh->ops.zero_sum_result == 0) {
 +                      /* both parities are correct */
 +                      if (!s->failed)
 +                              set_bit(STRIPE_INSYNC, &sh->state);
 +                      else {
 +                              /* in contrast to the raid5 case we can validate
 +                               * parity, but still have a failure to write
 +                               * back
 +                               */
 +                              sh->check_state = check_state_compute_result;
 +                              /* Returning at this point means that we may go
 +                               * off and bring p and/or q uptodate again so
 +                               * we make sure to check zero_sum_result again
 +                               * to verify if p or q need writeback
 +                               */
 +                      }
 +              } else {
 +                      conf->mddev->resync_mismatches += STRIPE_SECTORS;
 +                      if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
 +                              /* don't try to repair!! */
 +                              set_bit(STRIPE_INSYNC, &sh->state);
 +                      else {
 +                              int *target = &sh->ops.target;
 +
 +                              sh->ops.target = -1;
 +                              sh->ops.target2 = -1;
 +                              sh->check_state = check_state_compute_run;
 +                              set_bit(STRIPE_COMPUTE_RUN, &sh->state);
 +                              set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
 +                              if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
 +                                      set_bit(R5_Wantcompute,
 +                                              &sh->dev[pd_idx].flags);
 +                                      *target = pd_idx;
 +                                      target = &sh->ops.target2;
 +                                      s->uptodate++;
 +                              }
 +                              if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
 +                                      set_bit(R5_Wantcompute,
 +                                              &sh->dev[qd_idx].flags);
 +                                      *target = qd_idx;
 +                                      s->uptodate++;
 +                              }
 +                      }
 +              }
 +              break;
 +      case check_state_compute_run:
 +              break;
 +      default:
 +              printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
 +                     __func__, sh->check_state,
 +                     (unsigned long long) sh->sector);
 +              BUG();
        }
  }
  
@@@ -2830,7 -2666,6 +2829,7 @@@ static void handle_stripe_expansion(rai
                if (i != sh->pd_idx && i != sh->qd_idx) {
                        int dd_idx, j;
                        struct stripe_head *sh2;
 +                      struct async_submit_ctl submit;
  
                        sector_t bn = compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
                        }
  
                        /* place all the copies on one channel */
 +                      init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
                        tx = async_memcpy(sh2->dev[dd_idx].page,
 -                              sh->dev[i].page, 0, 0, STRIPE_SIZE,
 -                              ASYNC_TX_DEP_ACK, tx, NULL, NULL);
 +                                        sh->dev[i].page, 0, 0, STRIPE_SIZE,
 +                                        &submit);
  
                        set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
                        set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@@ -3139,7 -2973,7 +3138,7 @@@ static bool handle_stripe5(struct strip
                /* Need to write out all blocks after computing parity */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
 -              schedule_reconstruction5(sh, &s, 1, 1);
 +              schedule_reconstruction(sh, &s, 1, 1);
        } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
  
        if (s.ops_request)
 -              raid5_run_ops(sh, s.ops_request);
 +              raid_run_ops(sh, s.ops_request);
  
        ops_run_io(sh, &s);
  
        return blocked_rdev == NULL;
  }
  
 -static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 +static bool handle_stripe6(struct stripe_head *sh)
  {
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks;
        mdk_rdev_t *blocked_rdev = NULL;
  
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
 -              "pd_idx=%d, qd_idx=%d\n",
 +              "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
               (unsigned long long)sh->sector, sh->state,
 -             atomic_read(&sh->count), pd_idx, qd_idx);
 +             atomic_read(&sh->count), pd_idx, qd_idx,
 +             sh->check_state, sh->reconstruct_state);
        memset(&s, 0, sizeof(s));
  
        spin_lock(&sh->lock);
  
                pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
                        i, dev->flags, dev->toread, dev->towrite, dev->written);
 -              /* maybe we can reply to a read */
 -              if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
 -                      struct bio *rbi, *rbi2;
 -                      pr_debug("Return read for disc %d\n", i);
 -                      spin_lock_irq(&conf->device_lock);
 -                      rbi = dev->toread;
 -                      dev->toread = NULL;
 -                      if (test_and_clear_bit(R5_Overlap, &dev->flags))
 -                              wake_up(&conf->wait_for_overlap);
 -                      spin_unlock_irq(&conf->device_lock);
 -                      while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
 -                              copy_data(0, rbi, dev->page, dev->sector);
 -                              rbi2 = r5_next_bio(rbi, dev->sector);
 -                              spin_lock_irq(&conf->device_lock);
 -                              if (!raid5_dec_bi_phys_segments(rbi)) {
 -                                      rbi->bi_next = return_bi;
 -                                      return_bi = rbi;
 -                              }
 -                              spin_unlock_irq(&conf->device_lock);
 -                              rbi = rbi2;
 -                      }
 -              }
 +              /* maybe we can reply to a read
 +               *
 +               * new wantfill requests are only permitted while
 +               * ops_complete_biofill is guaranteed to be inactive
 +               */
 +              if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
 +                  !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
 +                      set_bit(R5_Wantfill, &dev->flags);
  
                /* now count some things */
                if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
                if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
 +              if (test_bit(R5_Wantcompute, &dev->flags))
 +                      BUG_ON(++s.compute > 2);
  
 -
 -              if (dev->toread)
 +              if (test_bit(R5_Wantfill, &dev->flags)) {
 +                      s.to_fill++;
 +              } else if (dev->toread)
                        s.to_read++;
                if (dev->towrite) {
                        s.to_write++;
                blocked_rdev = NULL;
        }
  
 +      if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
 +              set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
 +              set_bit(STRIPE_BIOFILL_RUN, &sh->state);
 +      }
 +
        pr_debug("locked=%d uptodate=%d to_read=%d"
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
         * or to load a block that is being partially written.
         */
        if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
 -          (s.syncing && (s.uptodate < disks)) || s.expanding)
 +          (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
                handle_stripe_fill6(sh, &s, &r6s, disks);
  
 -      /* now to consider writing and what else, if anything should be read */
 -      if (s.to_write)
 +      /* Now we check to see if any write operations have recently
 +       * completed
 +       */
 +      if (sh->reconstruct_state == reconstruct_state_drain_result) {
 +              int qd_idx = sh->qd_idx;
 +
 +              sh->reconstruct_state = reconstruct_state_idle;
 +              /* All the 'written' buffers and the parity blocks are ready to
 +               * be written back to disk
 +               */
 +              BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
 +              BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
 +              for (i = disks; i--; ) {
 +                      dev = &sh->dev[i];
 +                      if (test_bit(R5_LOCKED, &dev->flags) &&
 +                          (i == sh->pd_idx || i == qd_idx ||
 +                           dev->written)) {
 +                              pr_debug("Writing block %d\n", i);
 +                              BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
 +                              set_bit(R5_Wantwrite, &dev->flags);
 +                              if (!test_bit(R5_Insync, &dev->flags) ||
 +                                  ((i == sh->pd_idx || i == qd_idx) &&
 +                                    s.failed == 0))
 +                                      set_bit(STRIPE_INSYNC, &sh->state);
 +                      }
 +              }
 +              if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 +                      atomic_dec(&conf->preread_active_stripes);
 +                      if (atomic_read(&conf->preread_active_stripes) <
 +                              IO_THRESHOLD)
 +                              md_wakeup_thread(conf->mddev->thread);
 +              }
 +      }
 +
 +      /* Now to consider new write requests and what else, if anything
 +       * should be read.  We do not handle new writes when:
 +       * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
 +       * 2/ A 'check' operation is in flight, as it may clobber the parity
 +       *    block.
 +       */
 +      if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
  
        /* maybe we need to check and possibly fix the parity for this stripe
         * Any reads will already have been scheduled, so we just see if enough
 -       * data is available
 +       * data is available.  The parity check is held off while parity
 +       * dependent operations are in flight.
         */
 -      if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
 -              handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
 +      if (sh->check_state ||
 +          (s.syncing && s.locked == 0 &&
 +           !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
 +           !test_bit(STRIPE_INSYNC, &sh->state)))
 +              handle_parity_checks6(conf, sh, &s, &r6s, disks);
  
        if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
                md_done_sync(conf->mddev, STRIPE_SECTORS,1);
                                        set_bit(R5_Wantwrite, &dev->flags);
                                        set_bit(R5_ReWrite, &dev->flags);
                                        set_bit(R5_LOCKED, &dev->flags);
 +                                      s.locked++;
                                } else {
                                        /* let's read it back */
                                        set_bit(R5_Wantread, &dev->flags);
                                        set_bit(R5_LOCKED, &dev->flags);
 +                                      s.locked++;
                                }
                        }
                }
  
 -      if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
 +      /* Finish reconstruct operations initiated by the expansion process */
 +      if (sh->reconstruct_state == reconstruct_state_result) {
 +              sh->reconstruct_state = reconstruct_state_idle;
 +              clear_bit(STRIPE_EXPANDING, &sh->state);
 +              for (i = conf->raid_disks; i--; ) {
 +                      set_bit(R5_Wantwrite, &sh->dev[i].flags);
 +                      set_bit(R5_LOCKED, &sh->dev[i].flags);
 +                      s.locked++;
 +              }
 +      }
 +
 +      if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
 +          !sh->reconstruct_state) {
                struct stripe_head *sh2
                        = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
                /* Need to write out all blocks after computing P&Q */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
 -              compute_parity6(sh, RECONSTRUCT_WRITE);
 -              for (i = conf->raid_disks ; i-- ;  ) {
 -                      set_bit(R5_LOCKED, &sh->dev[i].flags);
 -                      s.locked++;
 -                      set_bit(R5_Wantwrite, &sh->dev[i].flags);
 -              }
 -              clear_bit(STRIPE_EXPANDING, &sh->state);
 -      } else if (s.expanded) {
 +              schedule_reconstruction(sh, &s, 1, 1);
 +      } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
                wake_up(&conf->wait_for_overlap);
        if (unlikely(blocked_rdev))
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
  
 +      if (s.ops_request)
 +              raid_run_ops(sh, s.ops_request);
 +
        ops_run_io(sh, &s);
  
        return_io(return_bi);
  }
  
  /* returns true if the stripe was handled */
 -static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 +static bool handle_stripe(struct stripe_head *sh)
  {
        if (sh->raid_conf->level == 6)
 -              return handle_stripe6(sh, tmp_page);
 +              return handle_stripe6(sh);
        else
                return handle_stripe5(sh);
  }
  
 -
 -
  static void raid5_activate_delayed(raid5_conf_t *conf)
  {
        if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@@ -3496,7 -3283,7 +3495,7 @@@ static void activate_bit_delay(raid5_co
  
  static void unplug_slaves(mddev_t *mddev)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        int i;
  
        rcu_read_lock();
  static void raid5_unplug_device(struct request_queue *q)
  {
        mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        unsigned long flags;
  
        spin_lock_irqsave(&conf->device_lock, flags);
  static int raid5_congested(void *data, int bits)
  {
        mddev_t *mddev = data;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
  
        /* No difference between reads and writes.  Just check
         * how busy the stripe_cache is
@@@ -3564,14 -3351,14 +3563,14 @@@ static int raid5_mergeable_bvec(struct 
        mddev_t *mddev = q->queuedata;
        sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
        int max;
-       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int chunk_sectors = mddev->chunk_sectors;
        unsigned int bio_sectors = bvm->bi_size >> 9;
  
        if ((bvm->bi_rw & 1) == WRITE)
                return biovec->bv_len; /* always allow writes to be mergeable */
  
-       if (mddev->new_chunk < mddev->chunk_size)
-               chunk_sectors = mddev->new_chunk >> 9;
+       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+               chunk_sectors = mddev->new_chunk_sectors;
        max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
        if (max < 0) max = 0;
        if (max <= biovec->bv_len && bio_sectors == 0)
  static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
  {
        sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int chunk_sectors = mddev->chunk_sectors;
        unsigned int bio_sectors = bio->bi_size >> 9;
  
-       if (mddev->new_chunk < mddev->chunk_size)
-               chunk_sectors = mddev->new_chunk >> 9;
+       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+               chunk_sectors = mddev->new_chunk_sectors;
        return  chunk_sectors >=
                ((sector & (chunk_sectors - 1)) + bio_sectors);
  }
@@@ -3652,7 -3439,7 +3651,7 @@@ static void raid5_align_endio(struct bi
        bio_put(bi);
  
        mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
-       conf = mddev_to_conf(mddev);
+       conf = mddev->private;
        rdev = (void*)raid_bi->bi_next;
        raid_bi->bi_next = NULL;
  
@@@ -3675,10 -3462,10 +3674,10 @@@ static int bio_fits_rdev(struct bio *bi
  {
        struct request_queue *q = bdev_get_queue(bi->bi_bdev);
  
-       if ((bi->bi_size>>9) > q->max_sectors)
+       if ((bi->bi_size>>9) > queue_max_sectors(q))
                return 0;
        blk_recount_segments(q, bi);
-       if (bi->bi_phys_segments > q->max_phys_segments)
+       if (bi->bi_phys_segments > queue_max_phys_segments(q))
                return 0;
  
        if (q->merge_bvec_fn)
  static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
  {
        mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        unsigned int dd_idx;
        struct bio* align_bi;
        mdk_rdev_t *rdev;
@@@ -3811,7 -3598,7 +3810,7 @@@ static struct stripe_head *__get_priori
  static int make_request(struct request_queue *q, struct bio * bi)
  {
        mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        int dd_idx;
        sector_t new_sector;
        sector_t logical_sector, last_sector;
                                spin_unlock_irq(&conf->device_lock);
                                if (must_retry) {
                                        release_stripe(sh);
+                                       schedule();
                                        goto retry;
                                }
                        }
@@@ -4003,10 -3791,10 +4003,10 @@@ static sector_t reshape_request(mddev_
         * If old and new chunk sizes differ, we need to process the
         * largest of these
         */
-       if (mddev->new_chunk > mddev->chunk_size)
-               reshape_sectors = mddev->new_chunk / 512;
+       if (mddev->new_chunk_sectors > mddev->chunk_sectors)
+               reshape_sectors = mddev->new_chunk_sectors;
        else
-               reshape_sectors = mddev->chunk_size / 512;
+               reshape_sectors = mddev->chunk_sectors;
  
        /* we update the metadata when there is more than 3Meg
         * in the block range (that is rather arbitrary, should
                                     1, &dd_idx, NULL);
        last_sector =
                raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
-                                           *(new_data_disks) - 1),
+                                           * new_data_disks - 1),
                                     1, &dd_idx, NULL);
        if (last_sector >= mddev->dev_sectors)
                last_sector = mddev->dev_sectors - 1;
                wait_event(conf->wait_for_overlap,
                           atomic_read(&conf->reshape_stripes) == 0);
                mddev->reshape_position = conf->reshape_progress;
-               mddev->curr_resync_completed = mddev->curr_resync;
+               mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
@@@ -4258,7 -4046,7 +4258,7 @@@ static inline sector_t sync_request(mdd
        spin_unlock(&sh->lock);
  
        /* wait for any blocked device to be handled */
 -      while(unlikely(!handle_stripe(sh, NULL)))
 +      while (unlikely(!handle_stripe(sh)))
                ;
        release_stripe(sh);
  
@@@ -4315,7 -4103,7 +4315,7 @@@ static int  retry_aligned_read(raid5_co
                        return handled;
                }
  
 -              handle_stripe(sh, NULL);
 +              handle_stripe(sh);
                release_stripe(sh);
                handled++;
        }
        return handled;
  }
  
 +#ifdef CONFIG_MULTICORE_RAID456
 +static void __process_stripe(void *param, async_cookie_t cookie)
 +{
 +      struct stripe_head *sh = param;
 +
 +      handle_stripe(sh);
 +      release_stripe(sh);
 +}
 +
 +static void process_stripe(struct stripe_head *sh, struct list_head *domain)
 +{
 +      async_schedule_domain(__process_stripe, sh, domain);
 +}
 +
 +static void synchronize_stripe_processing(struct list_head *domain)
 +{
 +      async_synchronize_full_domain(domain);
 +}
 +#else
 +static void process_stripe(struct stripe_head *sh, struct list_head *domain)
 +{
 +      handle_stripe(sh);
 +      release_stripe(sh);
 +      cond_resched();
 +}
 +
 +static void synchronize_stripe_processing(struct list_head *domain)
 +{
 +}
 +#endif
  
  
  /*
  static void raid5d(mddev_t *mddev)
  {
        struct stripe_head *sh;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        int handled;
 +      LIST_HEAD(raid_domain);
  
        pr_debug("+++ raid5d active\n");
  
                spin_unlock_irq(&conf->device_lock);
                
                handled++;
 -              handle_stripe(sh, conf->spare_page);
 -              release_stripe(sh);
 +              process_stripe(sh, &raid_domain);
  
                spin_lock_irq(&conf->device_lock);
        }
  
        spin_unlock_irq(&conf->device_lock);
  
 +      synchronize_stripe_processing(&raid_domain);
        async_tx_issue_pending_all();
        unplug_slaves(mddev);
  
  static ssize_t
  raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        if (conf)
                return sprintf(page, "%d\n", conf->max_nr_stripes);
        else
  static ssize_t
  raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        unsigned long new;
        int err;
  
@@@ -4476,7 -4233,7 +4476,7 @@@ raid5_stripecache_size = __ATTR(stripe_
  static ssize_t
  raid5_show_preread_threshold(mddev_t *mddev, char *page)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        if (conf)
                return sprintf(page, "%d\n", conf->bypass_threshold);
        else
  static ssize_t
  raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        unsigned long new;
        if (len >= PAGE_SIZE)
                return -EINVAL;
@@@ -4510,7 -4267,7 +4510,7 @@@ raid5_preread_bypass_threshold = __ATTR
  static ssize_t
  stripe_cache_active_show(mddev_t *mddev, char *page)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        if (conf)
                return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
        else
@@@ -4534,7 -4291,7 +4534,7 @@@ static struct attribute_group raid5_att
  static sector_t
  raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
  
        if (!sectors)
                sectors = mddev->dev_sectors;
                        raid_disks = conf->previous_raid_disks;
        }
  
-       sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
-       sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+       sectors &= ~((sector_t)mddev->chunk_sectors - 1);
+       sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
        return sectors * (raid_disks - conf->max_degraded);
  }
  
 +static void raid5_free_percpu(raid5_conf_t *conf)
 +{
 +      struct raid5_percpu *percpu;
 +      unsigned long cpu;
 +
 +      if (!conf->percpu)
 +              return;
 +
 +      get_online_cpus();
 +      for_each_possible_cpu(cpu) {
 +              percpu = per_cpu_ptr(conf->percpu, cpu);
 +              safe_put_page(percpu->spare_page);
 +              kfree(percpu->scribble);
 +      }
 +#ifdef CONFIG_HOTPLUG_CPU
 +      unregister_cpu_notifier(&conf->cpu_notify);
 +#endif
 +      put_online_cpus();
 +
 +      free_percpu(conf->percpu);
 +}
 +
 +static void free_conf(raid5_conf_t *conf)
 +{
 +      shrink_stripes(conf);
 +      raid5_free_percpu(conf);
 +      kfree(conf->disks);
 +      kfree(conf->stripe_hashtbl);
 +      kfree(conf);
 +}
 +
 +#ifdef CONFIG_HOTPLUG_CPU
 +static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
 +                            void *hcpu)
 +{
 +      raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
 +      long cpu = (long)hcpu;
 +      struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
 +
 +      switch (action) {
 +      case CPU_UP_PREPARE:
 +      case CPU_UP_PREPARE_FROZEN:
 +              if (conf->level == 6 && !percpu->spare_page)
 +                      percpu->spare_page = alloc_page(GFP_KERNEL);
 +              if (!percpu->scribble)
 +                      percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
 +
 +              if (!percpu->scribble ||
 +                  (conf->level == 6 && !percpu->spare_page)) {
 +                      safe_put_page(percpu->spare_page);
 +                      kfree(percpu->scribble);
 +                      pr_err("%s: failed memory allocation for cpu%ld\n",
 +                             __func__, cpu);
 +                      return NOTIFY_BAD;
 +              }
 +              break;
 +      case CPU_DEAD:
 +      case CPU_DEAD_FROZEN:
 +              safe_put_page(percpu->spare_page);
 +              kfree(percpu->scribble);
 +              percpu->spare_page = NULL;
 +              percpu->scribble = NULL;
 +              break;
 +      default:
 +              break;
 +      }
 +      return NOTIFY_OK;
 +}
 +#endif
 +
 +static int raid5_alloc_percpu(raid5_conf_t *conf)
 +{
 +      unsigned long cpu;
 +      struct page *spare_page;
 +      struct raid5_percpu *allcpus;
 +      void *scribble;
 +      int err;
 +
 +      allcpus = alloc_percpu(struct raid5_percpu);
 +      if (!allcpus)
 +              return -ENOMEM;
 +      conf->percpu = allcpus;
 +
 +      get_online_cpus();
 +      err = 0;
 +      for_each_present_cpu(cpu) {
 +              if (conf->level == 6) {
 +                      spare_page = alloc_page(GFP_KERNEL);
 +                      if (!spare_page) {
 +                              err = -ENOMEM;
 +                              break;
 +                      }
 +                      per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
 +              }
 +              scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
 +              if (!scribble) {
 +                      err = -ENOMEM;
 +                      break;
 +              }
 +              per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
 +      }
 +#ifdef CONFIG_HOTPLUG_CPU
 +      conf->cpu_notify.notifier_call = raid456_cpu_notify;
 +      conf->cpu_notify.priority = 0;
 +      if (err == 0)
 +              err = register_cpu_notifier(&conf->cpu_notify);
 +#endif
 +      put_online_cpus();
 +
 +      return err;
 +}
 +
  static raid5_conf_t *setup_conf(mddev_t *mddev)
  {
        raid5_conf_t *conf;
                return ERR_PTR(-EINVAL);
        }
  
-       if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+       if (!mddev->new_chunk_sectors ||
+           (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
+           !is_power_of_2(mddev->new_chunk_sectors)) {
                printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
-                       mddev->new_chunk, mdname(mddev));
+                      mddev->new_chunk_sectors << 9, mdname(mddev));
                return ERR_PTR(-EINVAL);
        }
  
                goto abort;
  
        conf->raid_disks = mddev->raid_disks;
 +      conf->scribble_len = scribble_len(conf->raid_disks);
        if (mddev->reshape_position == MaxSector)
                conf->previous_raid_disks = mddev->raid_disks;
        else
        if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                goto abort;
  
 -      if (mddev->new_level == 6) {
 -              conf->spare_page = alloc_page(GFP_KERNEL);
 -              if (!conf->spare_page)
 -                      goto abort;
 -      }
 +      conf->level = mddev->new_level;
 +      if (raid5_alloc_percpu(conf) != 0)
 +              goto abort;
 +
        spin_lock_init(&conf->device_lock);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
                        conf->fullsync = 1;
        }
  
-       conf->chunk_size = mddev->new_chunk;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
+       conf->level = mddev->new_level;
        if (conf->level == 6)
                conf->max_degraded = 2;
        else
        conf->max_nr_stripes = NR_STRIPES;
        conf->reshape_progress = mddev->reshape_position;
        if (conf->reshape_progress != MaxSector) {
-               conf->prev_chunk = mddev->chunk_size;
+               conf->prev_chunk_sectors = mddev->chunk_sectors;
                conf->prev_algo = mddev->layout;
        }
  
  
   abort:
        if (conf) {
 -              shrink_stripes(conf);
 -              safe_put_page(conf->spare_page);
 -              kfree(conf->disks);
 -              kfree(conf->stripe_hashtbl);
 -              kfree(conf);
 +              free_conf(conf);
                return ERR_PTR(-EIO);
        } else
                return ERR_PTR(-ENOMEM);
@@@ -4803,6 -4455,10 +4806,10 @@@ static int run(mddev_t *mddev
        int working_disks = 0;
        mdk_rdev_t *rdev;
  
+       if (mddev->recovery_cp != MaxSector)
+               printk(KERN_NOTICE "raid5: %s is not clean"
+                      " -- starting background reconstruction\n",
+                      mdname(mddev));
        if (mddev->reshape_position != MaxSector) {
                /* Check that we can continue the reshape.
                 * Currently only disks can change, it must
                 * geometry.
                 */
                here_new = mddev->reshape_position;
-               if (sector_div(here_new, (mddev->new_chunk>>9)*
+               if (sector_div(here_new, mddev->new_chunk_sectors *
                               (mddev->raid_disks - max_degraded))) {
                        printk(KERN_ERR "raid5: reshape_position not "
                               "on a stripe boundary\n");
                }
                /* here_new is the stripe we will write to */
                here_old = mddev->reshape_position;
-               sector_div(here_old, (mddev->chunk_size>>9)*
+               sector_div(here_old, mddev->chunk_sectors *
                           (old_disks-max_degraded));
                /* here_old is the first stripe that we might need to read
                 * from */
        } else {
                BUG_ON(mddev->level != mddev->new_level);
                BUG_ON(mddev->layout != mddev->new_layout);
-               BUG_ON(mddev->chunk_size != mddev->new_chunk);
+               BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
                BUG_ON(mddev->delta_disks != 0);
        }
  
        }
  
        /* device size must be a multiple of chunk size */
-       mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+       mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
        mddev->resync_max_sectors = mddev->dev_sectors;
  
        if (mddev->degraded > 0 &&
        {
                int data_disks = conf->previous_raid_disks - conf->max_degraded;
                int stripe = data_disks *
-                       (mddev->chunk_size / PAGE_SIZE);
+                       ((mddev->chunk_sectors << 9) / PAGE_SIZE);
                if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
                        mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
        }
@@@ -4957,8 -4613,12 +4964,8 @@@ abort
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
        if (conf) {
 -              shrink_stripes(conf);
                print_raid5_conf(conf);
 -              safe_put_page(conf->spare_page);
 -              kfree(conf->disks);
 -              kfree(conf->stripe_hashtbl);
 -              kfree(conf);
 +              free_conf(conf);
        }
        mddev->private = NULL;
        printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@@@ -4973,10 -4633,13 +4980,10 @@@ static int stop(mddev_t *mddev
  
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
 -      shrink_stripes(conf);
 -      kfree(conf->stripe_hashtbl);
        mddev->queue->backing_dev_info.congested_fn = NULL;
        blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
        sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 -      kfree(conf->disks);
 -      kfree(conf);
 +      free_conf(conf);
        mddev->private = NULL;
        return 0;
  }
@@@ -5021,7 -4684,8 +5028,8 @@@ static void status(struct seq_file *seq
        raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
        int i;
  
-       seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
+       seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
+               mddev->chunk_sectors / 2, mddev->layout);
        seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
        for (i = 0; i < conf->raid_disks; i++)
                seq_printf (seq, "%s",
@@@ -5169,7 -4833,7 +5177,7 @@@ static int raid5_resize(mddev_t *mddev
         * any io in the removed space completes, but it hardly seems
         * worth it.
         */
-       sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+       sectors &= ~((sector_t)mddev->chunk_sectors - 1);
        md_set_array_sectors(mddev, raid5_size(mddev, sectors,
                                               mddev->raid_disks));
        if (mddev->array_sectors >
        return 0;
  }
  
- static int raid5_check_reshape(mddev_t *mddev)
+ static int check_stripe_cache(mddev_t *mddev)
+ {
+       /* Can only proceed if there are plenty of stripe_heads.
+        * We need a minimum of one full stripe,, and for sensible progress
+        * it is best to have about 4 times that.
+        * If we require 4 times, then the default 256 4K stripe_heads will
+        * allow for chunk sizes up to 256K, which is probably OK.
+        * If the chunk size is greater, user-space should request more
+        * stripe_heads first.
+        */
+       raid5_conf_t *conf = mddev->private;
+       if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+           > conf->max_nr_stripes ||
+           ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+           > conf->max_nr_stripes) {
+               printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
+                      ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
+                       / STRIPE_SIZE)*4);
+               return 0;
+       }
+       return 1;
+ }
+ static int check_reshape(mddev_t *mddev)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
  
        if (mddev->delta_disks == 0 &&
            mddev->new_layout == mddev->layout &&
-           mddev->new_chunk == mddev->chunk_size)
-               return -EINVAL; /* nothing to do */
+           mddev->new_chunk_sectors == mddev->chunk_sectors)
+               return 0; /* nothing to do */
        if (mddev->bitmap)
                /* Cannot grow a bitmap yet */
                return -EBUSY;
                        return -EINVAL;
        }
  
-       /* Can only proceed if there are plenty of stripe_heads.
-        * We need a minimum of one full stripe,, and for sensible progress
-        * it is best to have about 4 times that.
-        * If we require 4 times, then the default 256 4K stripe_heads will
-        * allow for chunk sizes up to 256K, which is probably OK.
-        * If the chunk size is greater, user-space should request more
-        * stripe_heads first.
-        */
-       if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
-           (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
-               printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
-                      (max(mddev->chunk_size, mddev->new_chunk)
-                       / STRIPE_SIZE)*4);
+       if (!check_stripe_cache(mddev))
                return -ENOSPC;
-       }
  
        return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
  }
  
  static int raid5_start_reshape(mddev_t *mddev)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
        mdk_rdev_t *rdev;
        int spares = 0;
        int added_devices = 0;
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                return -EBUSY;
  
+       if (!check_stripe_cache(mddev))
+               return -ENOSPC;
        list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk < 0 &&
                    !test_bit(Faulty, &rdev->flags))
        spin_lock_irq(&conf->device_lock);
        conf->previous_raid_disks = conf->raid_disks;
        conf->raid_disks += mddev->delta_disks;
-       conf->prev_chunk = conf->chunk_size;
-       conf->chunk_size = mddev->new_chunk;
+       conf->prev_chunk_sectors = conf->chunk_sectors;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
        conf->prev_algo = conf->algorithm;
        conf->algorithm = mddev->new_layout;
        if (mddev->delta_disks < 0)
@@@ -5351,7 -5028,7 +5372,7 @@@ static void end_reshape(raid5_conf_t *c
                 */
                {
                        int data_disks = conf->raid_disks - conf->max_degraded;
-                       int stripe = data_disks * (conf->chunk_size
+                       int stripe = data_disks * ((conf->chunk_sectors << 9)
                                                   / PAGE_SIZE);
                        if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
                                conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
  static void raid5_finish_reshape(mddev_t *mddev)
  {
        struct block_device *bdev;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
  
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
  
                                raid5_remove_disk(mddev, d);
                }
                mddev->layout = conf->algorithm;
-               mddev->chunk_size = conf->chunk_size;
+               mddev->chunk_sectors = conf->chunk_sectors;
                mddev->reshape_position = MaxSector;
                mddev->delta_disks = 0;
        }
  
  static void raid5_quiesce(mddev_t *mddev, int state)
  {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
  
        switch(state) {
        case 2: /* resume for a suspend */
@@@ -5454,7 -5131,7 +5475,7 @@@ static void *raid5_takeover_raid1(mddev
  
        mddev->new_level = 5;
        mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
-       mddev->new_chunk = chunksect << 9;
+       mddev->new_chunk_sectors = chunksect;
  
        return setup_conf(mddev);
  }
@@@ -5493,24 -5170,24 +5514,24 @@@ static void *raid5_takeover_raid6(mddev
  }
  
  
- static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+ static int raid5_check_reshape(mddev_t *mddev)
  {
        /* For a 2-drive array, the layout and chunk size can be changed
         * immediately as not restriping is needed.
         * For larger arrays we record the new value - after validation
         * to be used by a reshape pass.
         */
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
+       int new_chunk = mddev->new_chunk_sectors;
  
-       if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+       if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
                return -EINVAL;
        if (new_chunk > 0) {
-               if (new_chunk & (new_chunk-1))
-                       /* not a power of 2 */
+               if (!is_power_of_2(new_chunk))
                        return -EINVAL;
-               if (new_chunk < PAGE_SIZE)
+               if (new_chunk < (PAGE_SIZE>>9))
                        return -EINVAL;
-               if (mddev->array_sectors & ((new_chunk>>9)-1))
+               if (mddev->array_sectors & (new_chunk-1))
                        /* not factor of array size */
                        return -EINVAL;
        }
        /* They look valid */
  
        if (mddev->raid_disks == 2) {
-               if (new_layout >= 0) {
-                       conf->algorithm = new_layout;
-                       mddev->layout = mddev->new_layout = new_layout;
+               /* can make the change immediately */
+               if (mddev->new_layout >= 0) {
+                       conf->algorithm = mddev->new_layout;
+                       mddev->layout = mddev->new_layout;
                }
                if (new_chunk > 0) {
-                       conf->chunk_size = new_chunk;
-                       mddev->chunk_size = mddev->new_chunk = new_chunk;
+                       conf->chunk_sectors = new_chunk ;
+                       mddev->chunk_sectors = new_chunk;
                }
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
-       } else {
-               if (new_layout >= 0)
-                       mddev->new_layout = new_layout;
-               if (new_chunk > 0)
-                       mddev->new_chunk = new_chunk;
        }
-       return 0;
+       return check_reshape(mddev);
  }
  
- static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+ static int raid6_check_reshape(mddev_t *mddev)
  {
-       if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+       int new_chunk = mddev->new_chunk_sectors;
+       if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
                return -EINVAL;
        if (new_chunk > 0) {
-               if (new_chunk & (new_chunk-1))
-                       /* not a power of 2 */
+               if (!is_power_of_2(new_chunk))
                        return -EINVAL;
-               if (new_chunk < PAGE_SIZE)
+               if (new_chunk < (PAGE_SIZE >> 9))
                        return -EINVAL;
-               if (mddev->array_sectors & ((new_chunk>>9)-1))
+               if (mddev->array_sectors & (new_chunk-1))
                        /* not factor of array size */
                        return -EINVAL;
        }
  
        /* They look valid */
-       if (new_layout >= 0)
-               mddev->new_layout = new_layout;
-       if (new_chunk > 0)
-               mddev->new_chunk = new_chunk;
-       return 0;
+       return check_reshape(mddev);
  }
  
  static void *raid5_takeover(mddev_t *mddev)
         *  raid1 - if there are two drives.  We need to know the chunk size
         *  raid4 - trivial - just use a raid4 layout.
         *  raid6 - Providing it is a *_6 layout
-        *
-        * For now, just do raid1
         */
  
        if (mddev->level == 1)
@@@ -5653,12 -5318,11 +5662,11 @@@ static struct mdk_personality raid6_per
        .sync_request   = sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
-       .check_reshape  = raid5_check_reshape,
+       .check_reshape  = raid6_check_reshape,
        .start_reshape  = raid5_start_reshape,
        .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid6_takeover,
-       .reconfig       = raid6_reconfig,
  };
  static struct mdk_personality raid5_personality =
  {
        .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid5_takeover,
-       .reconfig       = raid5_reconfig,
  };
  
  static struct mdk_personality raid4_personality =
diff --combined drivers/md/raid5.h
index 116d0b44b2a9ea9f7b302c38d28c05643006f249,9459689c4ea00a50073cdcfca38ee038bda10364..2390e0e83daf7c939344f8062a9d4a6ef737c476
@@@ -2,7 -2,6 +2,7 @@@
  #define _RAID5_H
  
  #include <linux/raid/xor.h>
 +#include <linux/dmaengine.h>
  
  /*
   *
   */
  enum check_states {
        check_state_idle = 0,
 -      check_state_run, /* parity check */
 +      check_state_run, /* xor parity check */
 +      check_state_run_q, /* q-parity check */
 +      check_state_run_pq, /* pq dual parity check */
        check_state_check_result,
        check_state_compute_run, /* parity repair */
        check_state_compute_result,
@@@ -218,8 -215,8 +218,8 @@@ struct stripe_head 
         * @target - STRIPE_OP_COMPUTE_BLK target
         */
        struct stripe_operations {
 -              int                target;
 -              u32                zero_sum_result;
 +              int                  target, target2;
 +              enum sum_check_flags zero_sum_result;
        } ops;
        struct r5dev {
                struct bio      req;
@@@ -301,7 -298,7 +301,7 @@@ struct r6_state 
  #define STRIPE_OP_COMPUTE_BLK 1
  #define STRIPE_OP_PREXOR      2
  #define STRIPE_OP_BIODRAIN    3
 -#define STRIPE_OP_POSTXOR     4
 +#define STRIPE_OP_RECONSTRUCT 4
  #define STRIPE_OP_CHECK       5
  
  /*
@@@ -337,7 -334,8 +337,8 @@@ struct raid5_private_data 
        struct hlist_head       *stripe_hashtbl;
        mddev_t                 *mddev;
        struct disk_info        *spare;
-       int                     chunk_size, level, algorithm;
+       int                     chunk_sectors;
+       int                     level, algorithm;
        int                     max_degraded;
        int                     raid_disks;
        int                     max_nr_stripes;
         */
        sector_t                reshape_safe;
        int                     previous_raid_disks;
-       int                     prev_chunk, prev_algo;
+       int                     prev_chunk_sectors;
+       int                     prev_algo;
        short                   generation; /* increments with every reshape */
        unsigned long           reshape_checkpoint; /* Time we last updated
                                                     * metadata */
                                            * (fresh device added).
                                            * Cleared when a sync completes.
                                            */
 -
 -      struct page             *spare_page; /* Used when checking P/Q in raid6 */
 +      /* per cpu variables */
 +      struct raid5_percpu {
 +              struct page     *spare_page; /* Used when checking P/Q in raid6 */
 +              void            *scribble;   /* space for constructing buffer
 +                                            * lists and performing address
 +                                            * conversions
 +                                            */
 +      } *percpu;
 +      size_t                  scribble_len; /* size of scribble region must be
 +                                             * associated with conf to handle
 +                                             * cpu hotplug while reshaping
 +                                             */
 +#ifdef CONFIG_HOTPLUG_CPU
 +      struct notifier_block   cpu_notify;
 +#endif
  
        /*
         * Free stripes pool
  
  typedef struct raid5_private_data raid5_conf_t;
  
- #define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private)
  /*
   * Our supported algorithms
   */
index 835b9c7bf1c26c61c908b2a00324d4ff27956808,f114bc7790bc46cfefbb8bd7417ebd35e7eea77f..2b9f2ac7ed60f0e8c61e79f2f7791545f877e1ef
@@@ -48,20 -48,19 +48,20 @@@ enum dma_status 
  
  /**
   * enum dma_transaction_type - DMA transaction types/indexes
 + *
 + * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
 + * automatically set as dma devices are registered.
   */
  enum dma_transaction_type {
        DMA_MEMCPY,
        DMA_XOR,
 -      DMA_PQ_XOR,
 -      DMA_DUAL_XOR,
 -      DMA_PQ_UPDATE,
 -      DMA_ZERO_SUM,
 -      DMA_PQ_ZERO_SUM,
 +      DMA_PQ,
 +      DMA_XOR_VAL,
 +      DMA_PQ_VAL,
        DMA_MEMSET,
 -      DMA_MEMCPY_CRC32C,
        DMA_INTERRUPT,
        DMA_PRIVATE,
 +      DMA_ASYNC_TX,
        DMA_SLAVE,
  };
  
  
  /**
   * enum dma_ctrl_flags - DMA flags to augment operation preparation,
 - *    control completion, and communicate status.
 + *  control completion, and communicate status.
   * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
 - *    this transaction
 + *  this transaction
   * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
 - *    acknowledges receipt, i.e. has has a chance to establish any
 - *    dependency chains
 + *  acknowledges receipt, i.e. has has a chance to establish any dependency
 + *  chains
   * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
   * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
   * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
   *    (if not set, do the source dma-unmapping as page)
   * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
   *    (if not set, do the destination dma-unmapping as page)
 + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
 + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
 + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
 + *  sources that were the result of a previous operation, in the case of a PQ
 + *  operation it continues the calculation with new sources
 + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
 + *  on the result of this operation
   */
  enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
        DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
        DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
 +      DMA_PREP_PQ_DISABLE_P = (1 << 6),
 +      DMA_PREP_PQ_DISABLE_Q = (1 << 7),
 +      DMA_PREP_CONTINUE = (1 << 8),
 +      DMA_PREP_FENCE = (1 << 9),
  };
  
 +/**
 + * enum sum_check_bits - bit position of pq_check_flags
 + */
 +enum sum_check_bits {
 +      SUM_CHECK_P = 0,
 +      SUM_CHECK_Q = 1,
 +};
 +
 +/**
 + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
 + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
 + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
 + */
 +enum sum_check_flags {
 +      SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
 +      SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
 +};
 +
 +
  /**
   * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
   * See linux/cpumask.h
@@@ -211,8 -180,6 +211,6 @@@ typedef void (*dma_async_tx_callback)(v
   * @flags: flags to augment operation preparation, control completion, and
   *    communicate status
   * @phys: physical address of the descriptor
-  * @tx_list: driver common field for operations that require multiple
-  *    descriptors
   * @chan: target channel for this operation
   * @tx_submit: set the prepared descriptor(s) to be executed by the engine
   * @callback: routine to call after this operation is complete
@@@ -226,7 -193,6 +224,6 @@@ struct dma_async_tx_descriptor 
        dma_cookie_t cookie;
        enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
        dma_addr_t phys;
-       struct list_head tx_list;
        struct dma_chan *chan;
        dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
        dma_async_tx_callback callback;
   * @global_node: list_head for global dma_device_list
   * @cap_mask: one or more dma_capability flags
   * @max_xor: maximum number of xor sources, 0 if no capability
 + * @max_pq: maximum number of PQ sources and PQ-continue capability
 + * @copy_align: alignment shift for memcpy operations
 + * @xor_align: alignment shift for xor operations
 + * @pq_align: alignment shift for pq operations
 + * @fill_align: alignment shift for memset operations
   * @dev_id: unique device ID
   * @dev: struct device reference for dma mapping api
   * @device_alloc_chan_resources: allocate resources and return the
   * @device_free_chan_resources: release DMA channel's resources
   * @device_prep_dma_memcpy: prepares a memcpy operation
   * @device_prep_dma_xor: prepares a xor operation
 - * @device_prep_dma_zero_sum: prepares a zero_sum operation
 + * @device_prep_dma_xor_val: prepares a xor validation operation
 + * @device_prep_dma_pq: prepares a pq operation
 + * @device_prep_dma_pq_val: prepares a pqzero_sum operation
   * @device_prep_dma_memset: prepares a memset operation
   * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
   * @device_prep_slave_sg: prepares a slave dma operation
@@@ -273,13 -232,7 +270,13 @@@ struct dma_device 
        struct list_head channels;
        struct list_head global_node;
        dma_cap_mask_t  cap_mask;
 -      int max_xor;
 +      unsigned short max_xor;
 +      unsigned short max_pq;
 +      u8 copy_align;
 +      u8 xor_align;
 +      u8 pq_align;
 +      u8 fill_align;
 +      #define DMA_HAS_PQ_CONTINUE (1 << 15)
  
        int dev_id;
        struct device *dev;
        struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
                struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
                unsigned int src_cnt, size_t len, unsigned long flags);
 -      struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
 +      struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
                struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
 -              size_t len, u32 *result, unsigned long flags);
 +              size_t len, enum sum_check_flags *result, unsigned long flags);
 +      struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
 +              struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 +              unsigned int src_cnt, const unsigned char *scf,
 +              size_t len, unsigned long flags);
 +      struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
 +              struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 +              unsigned int src_cnt, const unsigned char *scf, size_t len,
 +              enum sum_check_flags *pqres, unsigned long flags);
        struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
                struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
                unsigned long flags);
        void (*device_issue_pending)(struct dma_chan *chan);
  };
  
 +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
 +{
 +      size_t mask;
 +
 +      if (!align)
 +              return true;
 +      mask = (1 << align) - 1;
 +      if (mask & (off1 | off2 | len))
 +              return false;
 +      return true;
 +}
 +
 +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
 +                                     size_t off2, size_t len)
 +{
 +      return dmaengine_check_align(dev->copy_align, off1, off2, len);
 +}
 +
 +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
 +                                    size_t off2, size_t len)
 +{
 +      return dmaengine_check_align(dev->xor_align, off1, off2, len);
 +}
 +
 +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
 +                                   size_t off2, size_t len)
 +{
 +      return dmaengine_check_align(dev->pq_align, off1, off2, len);
 +}
 +
 +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
 +                                     size_t off2, size_t len)
 +{
 +      return dmaengine_check_align(dev->fill_align, off1, off2, len);
 +}
 +
 +static inline void
 +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 +{
 +      dma->max_pq = maxpq;
 +      if (has_pq_continue)
 +              dma->max_pq |= DMA_HAS_PQ_CONTINUE;
 +}
 +
 +static inline bool dmaf_continue(enum dma_ctrl_flags flags)
 +{
 +      return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
 +}
 +
 +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
 +{
 +      enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
 +
 +      return (flags & mask) == mask;
 +}
 +
 +static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
 +{
 +      return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
 +}
 +
 +static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
 +{
 +      return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
 +}
 +
 +/* dma_maxpq - reduce maxpq in the face of continued operations
 + * @dma - dma device with PQ capability
 + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
 + *
 + * When an engine does not support native continuation we need 3 extra
 + * source slots to reuse P and Q with the following coefficients:
 + * 1/ {00} * P : remove P from Q', but use it as a source for P'
 + * 2/ {01} * Q : use Q to continue Q' calculation
 + * 3/ {00} * Q : subtract Q from P' to cancel (2)
 + *
 + * In the case where P is disabled we only need 1 extra source:
 + * 1/ {01} * Q : use Q to continue Q' calculation
 + */
 +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
 +{
 +      if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
 +              return dma_dev_to_maxpq(dma);
 +      else if (dmaf_p_disabled_continue(flags))
 +              return dma_dev_to_maxpq(dma) - 1;
 +      else if (dmaf_continue(flags))
 +              return dma_dev_to_maxpq(dma) - 3;
 +      BUG();
 +}
 +
  /* --- public DMA engine API --- */
  
  #ifdef CONFIG_DMA_ENGINE
@@@ -441,11 -296,7 +438,11 @@@ static inline void net_dmaengine_put(vo
  #ifdef CONFIG_ASYNC_TX_DMA
  #define async_dmaengine_get() dmaengine_get()
  #define async_dmaengine_put() dmaengine_put()
 +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
 +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
 +#else
  #define async_dma_find_channel(type) dma_find_channel(type)
 +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
  #else
  static inline void async_dmaengine_get(void)
  {
@@@ -458,7 -309,7 +455,7 @@@ async_dma_find_channel(enum dma_transac
  {
        return NULL;
  }
 -#endif
 +#endif /* CONFIG_ASYNC_TX_DMA */
  
  dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
        void *dest, void *src, size_t len);
diff --combined include/linux/pci_ids.h
index 2b4b8ce532564677f01c1705f2c54cab407ec53b,a3b0003657955da3651542059572387fda381308..bbeb13ceb8e8def5b9e46c09e3764dad1f7d8c18
  #define PCI_CLASS_SERIAL_USB_UHCI     0x0c0300
  #define PCI_CLASS_SERIAL_USB_OHCI     0x0c0310
  #define PCI_CLASS_SERIAL_USB_EHCI     0x0c0320
+ #define PCI_CLASS_SERIAL_USB_XHCI     0x0c0330
  #define PCI_CLASS_SERIAL_FIBER                0x0c04
  #define PCI_CLASS_SERIAL_SMBUS                0x0c05
  
  #define PCI_DEVICE_ID_PLX_PCI200SYN   0x3196
  #define PCI_DEVICE_ID_PLX_9030          0x9030
  #define PCI_DEVICE_ID_PLX_9050                0x9050
+ #define PCI_DEVICE_ID_PLX_9056                0x9056
  #define PCI_DEVICE_ID_PLX_9080                0x9080
  #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2        0xa001
  
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS       0x0034
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE 0x0035
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA        0x0036
- #define PCI_DEVICE_ID_NVIDIA_NVENET_10                0x0037
- #define PCI_DEVICE_ID_NVIDIA_NVENET_11                0x0038
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2       0x003e
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_ULTRA 0x0040
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800       0x0041
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE 0x0053
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA        0x0054
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2       0x0055
- #define PCI_DEVICE_ID_NVIDIA_NVENET_8         0x0056
- #define PCI_DEVICE_ID_NVIDIA_NVENET_9         0x0057
  #define PCI_DEVICE_ID_NVIDIA_CK804_AUDIO      0x0059
  #define PCI_DEVICE_ID_NVIDIA_CK804_PCIE               0x005d
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2_SMBUS    0x0064
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE      0x0065
- #define PCI_DEVICE_ID_NVIDIA_NVENET_2         0x0066
  #define PCI_DEVICE_ID_NVIDIA_MCP2_MODEM               0x0069
  #define PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO               0x006a
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SMBUS   0x0084
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE     0x0085
- #define PCI_DEVICE_ID_NVIDIA_NVENET_4         0x0086
  #define PCI_DEVICE_ID_NVIDIA_MCP2S_MODEM      0x0089
  #define PCI_DEVICE_ID_NVIDIA_CK8_AUDIO                0x008a
- #define PCI_DEVICE_ID_NVIDIA_NVENET_5         0x008c
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA    0x008e
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GT   0x0090
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GTX 0x0091
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3          0x00d1
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS    0x00d4
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE      0x00d5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_3         0x00d6
  #define PCI_DEVICE_ID_NVIDIA_MCP3_MODEM               0x00d9
  #define PCI_DEVICE_ID_NVIDIA_MCP3_AUDIO               0x00da
- #define PCI_DEVICE_ID_NVIDIA_NVENET_7         0x00df
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3S         0x00e1
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA    0x00e3
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS   0x00e4
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE     0x00e5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_6         0x00e6
  #define PCI_DEVICE_ID_NVIDIA_CK8S_AUDIO               0x00ea
  #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2   0x00ee
  #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1 0x00f0
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_SMBUS     0x01b4
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE               0x01bc
  #define PCI_DEVICE_ID_NVIDIA_MCP1_MODEM               0x01c1
- #define PCI_DEVICE_ID_NVIDIA_NVENET_1         0x01c3
  #define PCI_DEVICE_ID_NVIDIA_NFORCE2          0x01e0
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE3         0x0200
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1               0x0201
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA        0x037E
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2       0x037F
- #define PCI_DEVICE_ID_NVIDIA_NVENET_12                0x0268
- #define PCI_DEVICE_ID_NVIDIA_NVENET_13                0x0269
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800 0x0280
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X    0x0281
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE     0x0282
  #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2    0x0348
  #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000       0x034C
  #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100         0x034E
- #define PCI_DEVICE_ID_NVIDIA_NVENET_14              0x0372
  #define PCI_DEVICE_ID_NVIDIA_NVENET_15              0x0373
- #define PCI_DEVICE_ID_NVIDIA_NVENET_16              0x03E5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_17              0x03E6
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA      0x03E7
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SMBUS           0x03EB
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE       0x03EC
- #define PCI_DEVICE_ID_NVIDIA_NVENET_18              0x03EE
- #define PCI_DEVICE_ID_NVIDIA_NVENET_19              0x03EF
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2     0x03F6
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3     0x03F7
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_SMBUS           0x0446
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE     0x0448
- #define PCI_DEVICE_ID_NVIDIA_NVENET_20              0x0450
- #define PCI_DEVICE_ID_NVIDIA_NVENET_21              0x0451
- #define PCI_DEVICE_ID_NVIDIA_NVENET_22              0x0452
- #define PCI_DEVICE_ID_NVIDIA_NVENET_23              0x0453
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_SMBUS     0x0542
- #define PCI_DEVICE_ID_NVIDIA_NVENET_24              0x054C
- #define PCI_DEVICE_ID_NVIDIA_NVENET_25              0x054D
- #define PCI_DEVICE_ID_NVIDIA_NVENET_26              0x054E
- #define PCI_DEVICE_ID_NVIDIA_NVENET_27              0x054F
- #define PCI_DEVICE_ID_NVIDIA_NVENET_28              0x07DC
- #define PCI_DEVICE_ID_NVIDIA_NVENET_29              0x07DD
- #define PCI_DEVICE_ID_NVIDIA_NVENET_30              0x07DE
- #define PCI_DEVICE_ID_NVIDIA_NVENET_31              0x07DF
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE       0x0560
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE       0x056C
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS    0x0752
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE       0x0759
- #define PCI_DEVICE_ID_NVIDIA_NVENET_32              0x0760
- #define PCI_DEVICE_ID_NVIDIA_NVENET_33              0x0761
- #define PCI_DEVICE_ID_NVIDIA_NVENET_34              0x0762
- #define PCI_DEVICE_ID_NVIDIA_NVENET_35              0x0763
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS     0x07D8
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS     0x0AA2
- #define PCI_DEVICE_ID_NVIDIA_NVENET_36              0x0AB0
- #define PCI_DEVICE_ID_NVIDIA_NVENET_37              0x0AB1
- #define PCI_DEVICE_ID_NVIDIA_NVENET_38              0x0AB2
- #define PCI_DEVICE_ID_NVIDIA_NVENET_39              0x0AB3
  
  #define PCI_VENDOR_ID_IMS             0x10e0
  #define PCI_DEVICE_ID_IMS_TT128               0x9128
  
  #define PCI_VENDOR_ID_CREATIVE                0x1102 /* duplicate: ECTIVA */
  #define PCI_DEVICE_ID_CREATIVE_EMU10K1        0x0002
+ #define PCI_DEVICE_ID_CREATIVE_20K1   0x0005
+ #define PCI_DEVICE_ID_CREATIVE_20K2   0x000b
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB0760      0x0024
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08801     0x0041
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08802     0x0042
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08803     0x0043
+ #define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX     0x6000
  
  #define PCI_VENDOR_ID_ECTIVA          0x1102 /* duplicate: CREATIVE */
  #define PCI_DEVICE_ID_ECTIVA_EV1938   0x8938
  #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO      0x0107
  #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2     0x0108
  
+ #define PCI_VENDOR_ID_DIGIGRAM                0x1369
+ #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM   0xc001
+ #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM       0xc002
  #define PCI_VENDOR_ID_KAWASAKI                0x136b
  #define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01
  
  #define PCI_SUBDEVICE_ID_CCD_SWYX4S   0xB540
  #define PCI_SUBDEVICE_ID_CCD_JH4S20   0xB550
  #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1 0xB552
+ #define PCI_SUBDEVICE_ID_CCD_JHSE1    0xB553
+ #define PCI_SUBDEVICE_ID_CCD_JH8S     0xB55B
  #define PCI_SUBDEVICE_ID_CCD_BN4S     0xB560
  #define PCI_SUBDEVICE_ID_CCD_BN8S     0xB562
  #define PCI_SUBDEVICE_ID_CCD_BNE1     0xB563
  #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U      0xC118
  #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU     0xC11C
  #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501
+ #define PCI_DEVICE_ID_OXSEMI_C950     0x950B
  #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511
  #define PCI_DEVICE_ID_OXSEMI_16PCI954PP       0x9513
  #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521
  #define PCI_DEVICE_ID_OXSEMI_16PCI952PP       0x9523
+ #define PCI_SUBDEVICE_ID_OXSEMI_C950  0x0001
  
  #define PCI_VENDOR_ID_CHELSIO         0x1425
  
  #define PCI_VENDOR_ID_MAINPINE                0x1522
  #define PCI_DEVICE_ID_MAINPINE_PBRIDGE        0x0100
  #define PCI_VENDOR_ID_ENE             0x1524
+ #define PCI_DEVICE_ID_ENE_CB710_FLASH 0x0510
  #define PCI_DEVICE_ID_ENE_CB712_SD    0x0550
  #define PCI_DEVICE_ID_ENE_CB712_SD_2  0x0551
  #define PCI_DEVICE_ID_ENE_CB714_SD    0x0750
  #define PCI_DEVICE_ID_MPC8547E                0x0018
  #define PCI_DEVICE_ID_MPC8545E                0x0019
  #define PCI_DEVICE_ID_MPC8545         0x001a
+ #define PCI_DEVICE_ID_MPC8569E                0x0061
+ #define PCI_DEVICE_ID_MPC8569         0x0060
  #define PCI_DEVICE_ID_MPC8568E                0x0020
  #define PCI_DEVICE_ID_MPC8568         0x0021
  #define PCI_DEVICE_ID_MPC8567E                0x0022
  #define PCI_DEVICE_ID_MPC8572         0x0041
  #define PCI_DEVICE_ID_MPC8536E                0x0050
  #define PCI_DEVICE_ID_MPC8536         0x0051
+ #define PCI_DEVICE_ID_P2020E          0x0070
+ #define PCI_DEVICE_ID_P2020           0x0071
  #define PCI_DEVICE_ID_MPC8641         0x7010
  #define PCI_DEVICE_ID_MPC8641D                0x7011
  #define PCI_DEVICE_ID_MPC8610         0x7018
  
  #define PCI_VENDOR_ID_QMI             0x1a32
  
+ #define PCI_VENDOR_ID_AZWAVE          0x1a3b
  #define PCI_VENDOR_ID_TEKRAM          0x1de1
  #define PCI_DEVICE_ID_TEKRAM_DC290    0xdc29
  
  #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
  #define PCI_DEVICE_ID_INTEL_IOAT_CNB  0x360b
  #define PCI_DEVICE_ID_INTEL_FBD_CNB   0x360c
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
  #define PCI_DEVICE_ID_INTEL_ICH10_0   0x3a14
  #define PCI_DEVICE_ID_INTEL_ICH10_1   0x3a16
  #define PCI_DEVICE_ID_INTEL_ICH10_2   0x3a18