Merge branch 'dmaengine' into async-tx-next

author Dan Williams <dan.j.williams@intel.com>

Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)

committer Dan Williams <dan.j.williams@intel.com>

Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
author Dan Williams <dan.j.williams@intel.com>
Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
committer Dan Williams <dan.j.williams@intel.com>
Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
diff --combined arch/arm/include/asm/hardware/iop_adma.h

index bbe8a0475cadb24410a73ae62b8f84cf71154cc8,95dc133d0a7f91d1fbc08ba98d6bdd75c2f2bd0d..59b8c3892f76731608b346d0d100910e9047c481
--- 1/arch/arm/include/asm/hardware/iop_adma.h
--- 2/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@@ -86,6 -86,7 +86,7 @@@ struct iop_adma_chan 
    * @idx: pool index
    * @unmap_src_cnt: number of xor sources
    * @unmap_len: transaction bytecount
+  * @tx_list: list of descriptors that are associated with one operation
    * @async_tx: support for the async_tx api
    * @group_list: list of slots that make up a multi-descriptor transaction
    *    for example transfer lengths larger than the supported hw max
@@@ -102,11 -103,11 +103,12 @@@ struct iop_adma_desc_slot 
         u16 idx;
         u16 unmap_src_cnt;
         size_t unmap_len;
+       struct list_head tx_list;
         struct dma_async_tx_descriptor async_tx;
         union {
                 u32 *xor_check_result;
                 u32 *crc32_result;
+ +              u32 *pq_check_result;
         };
   };
   
diff --combined drivers/dma/Kconfig

index ddcd9793b25c2e4c2d612e0c27cc52ab596ba6b3,81e1020fb5148a75677aec191e974c554031d03f..fe1f3717b1ffcacffb12af5a9a75aa53df446fd2
--- 1/drivers/dma/Kconfig
--- 2/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@@ -17,15 -17,11 +17,15 @@@ if DMADEVICE
   
   comment "DMA Devices"
   
+ +config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ +      bool
+ +
   config INTEL_IOATDMA
         tristate "Intel I/OAT DMA support"
         depends on PCI && X86
         select DMA_ENGINE
         select DCA
+ +      select ASYNC_TX_DISABLE_CHANNEL_SWITCH
         help
           Enable support for the Intel(R) I/OAT DMA engine present
           in recent Intel Xeon chipsets.
@@@ -50,6 -46,14 +50,14 @@@ config DW_DMA
           Support the Synopsys DesignWare AHB DMA controller.  This
           can be integrated in chips such as the Atmel AT32ap7000.
   
+ config AT_HDMAC
+       tristate "Atmel AHB DMA support"
+       depends on ARCH_AT91SAM9RL
+       select DMA_ENGINE
+       help
+         Support the Atmel AHB DMA controller.  This can be integrated in
+         chips such as the Atmel AT91SAM9RL.
+ 
   config FSL_DMA
         tristate "Freescale Elo and Elo Plus DMA support"
         depends on FSL_SOC
@@@ -85,6 -89,14 +93,14 @@@ config MX3_IPU_IRQ
           To avoid bloating the irq_desc[] array we allocate a sufficient
           number of IRQ slots and map them dynamically to specific sources.
   
+ config TXX9_DMAC
+       tristate "Toshiba TXx9 SoC DMA support"
+       depends on MACH_TX49XX || MACH_TX39XX
+       select DMA_ENGINE
+       help
+         Support the TXx9 SoC internal DMA controller.  This can be
+         integrated in chips such as the Toshiba TX4927/38/39.
+ 
   config DMA_ENGINE
         bool
   
@@@ -104,7 -116,7 +120,7 @@@ config NET_DM
   
   config ASYNC_TX_DMA
         bool "Async_tx: Offload support for the async_tx api"
-       depends on DMA_ENGINE
+       depends on DMA_ENGINE && !HIGHMEM64G
         help
           This allows the async_tx api to take advantage of offload engines for
           memcpy, memset, xor, and raid6 p+q operations.  If your platform has
diff --combined drivers/dma/dmaengine.c

index d5bc628d207cf29894a5d685e1d3df284b9788f6,562d182eae660178bf6155998d61b4c78b3c32b5..bd0b248de2cfabc28f1fecd63e5d64ed61236844
--- 1/drivers/dma/dmaengine.c
--- 2/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@@ -608,40 -608,6 +608,40 @@@ void dmaengine_put(void
   }
   EXPORT_SYMBOL(dmaengine_put);
   
+ +static bool device_has_all_tx_types(struct dma_device *device)
+ +{
+ +      /* A device that satisfies this test has channels that will never cause
+ +       * an async_tx channel switch event as all possible operation types can
+ +       * be handled.
+ +       */
+ +      #ifdef CONFIG_ASYNC_TX_DMA
+ +      if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ +              return false;
+ +      #endif
+ +
+ +      #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+ +      if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+ +              return false;
+ +      #endif
+ +
+ +      #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+ +      if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+ +              return false;
+ +      #endif
+ +
+ +      #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+ +      if (!dma_has_cap(DMA_XOR, device->cap_mask))
+ +              return false;
+ +      #endif
+ +
+ +      #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+ +      if (!dma_has_cap(DMA_PQ, device->cap_mask))
+ +              return false;
+ +      #endif
+ +
+ +      return true;
+ +}
+ +
   static int get_dma_id(struct dma_device *device)
   {
         int rc;
@@@ -678,12 -644,8 +678,12 @@@ int dma_async_device_register(struct dm
                 !device->device_prep_dma_memcpy);
         BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
                 !device->device_prep_dma_xor);
- -      BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
- -              !device->device_prep_dma_zero_sum);
+ +      BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+ +              !device->device_prep_dma_xor_val);
+ +      BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+ +              !device->device_prep_dma_pq);
+ +      BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+ +              !device->device_prep_dma_pq_val);
         BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
                 !device->device_prep_dma_memset);
         BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@@ -699,12 -661,6 +699,12 @@@
         BUG_ON(!device->device_issue_pending);
         BUG_ON(!device->dev);
   
+ +      /* note: this only matters in the
+ +       * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+ +       */
+ +      if (device_has_all_tx_types(device))
+ +              dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+ +
         idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
         if (!idr_ref)
                 return -ENOMEM;
@@@ -977,30 -933,54 +977,29 @@@ void dma_async_tx_descriptor_init(struc
   {
         tx->chan = chan;
         spin_lock_init(&tx->lock);
-       INIT_LIST_HEAD(&tx->tx_list);
   }
   EXPORT_SYMBOL(dma_async_tx_descriptor_init);
   
   /* dma_wait_for_async_tx - spin wait for a transaction to complete
    * @tx: in-flight transaction to wait on
- - *
- - * This routine assumes that tx was obtained from a call to async_memcpy,
- - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- - * and submitted).  Walking the parent chain is only meant to cover for DMA
- - * drivers that do not implement the DMA_INTERRUPT capability and may race with
- - * the driver's descriptor cleanup routine.
    */
   enum dma_status
   dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
   {
- -      enum dma_status status;
- -      struct dma_async_tx_descriptor *iter;
- -      struct dma_async_tx_descriptor *parent;
+ +      unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
   
         if (!tx)
                 return DMA_SUCCESS;
   
- -      WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
- -                " %s\n", __func__, dma_chan_name(tx->chan));
- -
- -      /* poll through the dependency chain, return when tx is complete */
- -      do {
- -              iter = tx;
- -
- -              /* find the root of the unsubmitted dependency chain */
- -              do {
- -                      parent = iter->parent;
- -                      if (!parent)
- -                              break;
- -                      else
- -                              iter = parent;
- -              } while (parent);
- -
- -              /* there is a small window for ->parent == NULL and
- -               * ->cookie == -EBUSY
- -               */
- -              while (iter->cookie == -EBUSY)
- -                      cpu_relax();
- -
- -              status = dma_sync_wait(iter->chan, iter->cookie);
- -      } while (status == DMA_IN_PROGRESS || (iter != tx));
- -
- -      return status;
+ +      while (tx->cookie == -EBUSY) {
+ +              if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ +                      pr_err("%s timeout waiting for descriptor submission\n",
+ +                              __func__);
+ +                      return DMA_ERROR;
+ +              }
+ +              cpu_relax();
+ +      }
+ +      return dma_sync_wait(tx->chan, tx->cookie);
   }
   EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
   
diff --combined drivers/dma/dmatest.c

index a3722a7384b5048a938d52215498218fffaf4bb0,d93017fc7872d1f0fe3904ff85870ff8aee1156a..a32a4cf7b1e049ab537db5c065f4f6a474dca2d2
--- 1/drivers/dma/dmatest.c
--- 2/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@@ -38,16 -38,16 +38,21 @@@ module_param(max_channels, uint, S_IRUG
   MODULE_PARM_DESC(max_channels,
                 "Maximum number of channels to use (default: all)");
   
+ static unsigned int iterations;
+ module_param(iterations, uint, S_IRUGO);
+ MODULE_PARM_DESC(iterations,
+               "Iterations before stopping test (default: infinite)");
+ 
   static unsigned int xor_sources = 3;
   module_param(xor_sources, uint, S_IRUGO);
   MODULE_PARM_DESC(xor_sources,
                 "Number of xor source buffers (default: 3)");
   
+ +static unsigned int pq_sources = 3;
+ +module_param(pq_sources, uint, S_IRUGO);
+ +MODULE_PARM_DESC(pq_sources,
+ +              "Number of p+q source buffers (default: 3)");
+ +
   /*
    * Initialization patterns. All bytes in the source buffer has bit 7
    * set, all bytes in the destination buffer has bit 7 cleared.
@@@ -119,7 -119,7 +124,7 @@@ static void dmatest_init_srcs(u8 **bufs
                         buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
                 for ( ; i < start + len; i++)
                         buf[i] = PATTERN_SRC | PATTERN_COPY
-                               | (~i & PATTERN_COUNT_MASK);;
+                               | (~i & PATTERN_COUNT_MASK);
                 for ( ; i < test_buf_size; i++)
                         buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
                 buf++;
@@@ -232,7 -232,6 +237,7 @@@ static int dmatest_func(void *data
         dma_cookie_t            cookie;
         enum dma_status         status;
         enum dma_ctrl_flags     flags;
+ +      u8                      pq_coefs[pq_sources];
         int                     ret;
         int                     src_cnt;
         int                     dst_cnt;
@@@ -249,11 -248,6 +254,11 @@@
         else if (thread->type == DMA_XOR) {
                 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
                 dst_cnt = 1;
+ +      } else if (thread->type == DMA_PQ) {
+ +              src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+ +              dst_cnt = 2;
+ +              for (i = 0; i < pq_sources; i++)
+ +                      pq_coefs[i] = 1;
         } else
                 goto err_srcs;
   
@@@ -281,14 -275,14 +286,15 @@@
   
         flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
   
-       while (!kthread_should_stop()) {
+       while (!kthread_should_stop()
+              && !(iterations && total_tests >= iterations)) {
                 struct dma_device *dev = chan->device;
                 struct dma_async_tx_descriptor *tx = NULL;
                 dma_addr_t dma_srcs[src_cnt];
                 dma_addr_t dma_dsts[dst_cnt];
                 struct completion cmp;
                 unsigned long tmo = msecs_to_jiffies(3000);
+ +              u8 align = 0;
   
                 total_tests++;
   
@@@ -296,18 -290,6 +302,18 @@@
                 src_off = dmatest_random() % (test_buf_size - len + 1);
                 dst_off = dmatest_random() % (test_buf_size - len + 1);
   
+ +              /* honor alignment restrictions */
+ +              if (thread->type == DMA_MEMCPY)
+ +                      align = dev->copy_align;
+ +              else if (thread->type == DMA_XOR)
+ +                      align = dev->xor_align;
+ +              else if (thread->type == DMA_PQ)
+ +                      align = dev->pq_align;
+ +
+ +              len = (len >> align) << align;
+ +              src_off = (src_off >> align) << align;
+ +              dst_off = (dst_off >> align) << align;
+ +
                 dmatest_init_srcs(thread->srcs, src_off, len);
                 dmatest_init_dsts(thread->dsts, dst_off, len);
   
@@@ -324,7 -306,6 +330,7 @@@
                                                      DMA_BIDIRECTIONAL);
                 }
   
+ +
                 if (thread->type == DMA_MEMCPY)
                         tx = dev->device_prep_dma_memcpy(chan,
                                                          dma_dsts[0] + dst_off,
@@@ -335,15 -316,6 +341,15 @@@
                                                       dma_dsts[0] + dst_off,
                                                       dma_srcs, xor_sources,
                                                       len, flags);
+ +              else if (thread->type == DMA_PQ) {
+ +                      dma_addr_t dma_pq[dst_cnt];
+ +
+ +                      for (i = 0; i < dst_cnt; i++)
+ +                              dma_pq[i] = dma_dsts[i] + dst_off;
+ +                      tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+ +                                                   pq_sources, pq_coefs,
+ +                                                   len, flags);
+ +              }
   
                 if (!tx) {
                         for (i = 0; i < src_cnt; i++)
@@@ -450,6 -422,13 +456,13 @@@ err_srcbuf
   err_srcs:
         pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
                         thread_name, total_tests, failed_tests, ret);
+ 
+       if (iterations > 0)
+               while (!kthread_should_stop()) {
+                       DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit);
+                       interruptible_sleep_on(&wait_dmatest_exit);
+               }
+ 
         return ret;
   }
   
@@@ -480,8 -459,6 +493,8 @@@ static int dmatest_add_threads(struct d
                 op = "copy";
         else if (type == DMA_XOR)
                 op = "xor";
+ +      else if (type == DMA_PQ)
+ +              op = "pq";
         else
                 return -EINVAL;
   
@@@ -531,16 -508,12 +544,16 @@@ static int dmatest_add_channel(struct d
   
         if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
                 cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
-               thread_count += cnt > 0 ?: 0;
+               thread_count += cnt > 0 ? cnt : 0;
         }
         if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
                 cnt = dmatest_add_threads(dtc, DMA_XOR);
-               thread_count += cnt > 0 ?: 0;
+               thread_count += cnt > 0 ? cnt : 0;
         }
+ +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ +              cnt = dmatest_add_threads(dtc, DMA_PQ);
+ +              thread_count += cnt > 0 ?: 0;
+ +      }
   
         pr_info("dmatest: Started %u threads using %s\n",
                 thread_count, dma_chan_name(chan));
diff --combined drivers/dma/ioat/dma.c

index 32a757be75c1896e47e6c913641b1a344ba46872,21527b89590cce79295e5aa3f6eccc9c735522d9..c524d36d3c2e199db83e5b2a9a8a7b7bf1939a21
--- 1/drivers/dma/ioat/dma.c
--- 2/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@@ -251,19 -251,18 +251,19 @@@ static dma_cookie_t ioat1_tx_submit(str
         dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
   
         /* write address into NextDescriptor field of last desc in chain */
-       first = to_ioat_desc(tx->tx_list.next);
+       first = to_ioat_desc(desc->tx_list.next);
         chain_tail = to_ioat_desc(ioat->used_desc.prev);
         /* make descriptor updates globally visible before chaining */
         wmb();
         chain_tail->hw->next = first->txd.phys;
-       list_splice_tail_init(&tx->tx_list, &ioat->used_desc);
+       list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
         dump_desc_dbg(ioat, chain_tail);
         dump_desc_dbg(ioat, first);
   
         if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
                 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
   
+ +      ioat->active += desc->hw->tx_cnt;
         ioat->pending += desc->hw->tx_cnt;
         if (ioat->pending >= ioat_pending_level)
                 __ioat1_dma_memcpy_issue_pending(ioat);
@@@ -298,6 -297,7 +298,7 @@@ ioat_dma_alloc_descriptor(struct ioat_d
   
         memset(desc, 0, sizeof(*desc));
   
+       INIT_LIST_HEAD(&desc_sw->tx_list);
         dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
         desc_sw->txd.tx_submit = ioat1_tx_submit;
         desc_sw->hw = desc;
@@@ -522,7 -522,7 +523,7 @@@ ioat1_dma_prep_memcpy(struct dma_chan *
   
         desc->txd.flags = flags;
         desc->len = total_len;
-       list_splice(&chain, &desc->txd.tx_list);
+       list_splice(&chain, &desc->tx_list);
         hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
         hw->ctl_f.compl_write = 1;
         hw->tx_cnt = tx_cnt;
@@@ -539,6 -539,17 +540,6 @@@ static void ioat1_cleanup_tasklet(unsig
         writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
   }
   
- -static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
- -                     int direction, enum dma_ctrl_flags flags, bool dst)
- -{
- -      if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
- -          (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
- -              pci_unmap_single(pdev, addr, len, direction);
- -      else
- -              pci_unmap_page(pdev, addr, len, direction);
- -}
- -
- -
   void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
                     size_t len, struct ioat_dma_descriptor *hw)
   {
@@@ -612,7 -623,6 +613,7 @@@ static void __cleanup(struct ioat_dma_c
                         chan->completed_cookie = tx->cookie;
                         tx->cookie = 0;
                         ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ +                      ioat->active -= desc->hw->tx_cnt;
                         if (tx->callback) {
                                 tx->callback(tx->callback_param);
                                 tx->callback = NULL;
@@@ -799,7 -809,7 +800,7 @@@ static void __devinit ioat_dma_test_cal
    * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
    * @device: device to be tested
    */
- -static int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+ +int __devinit ioat_dma_self_test(struct ioatdma_device *device)
   {
         int i;
         u8 *src;
@@@ -1030,8 -1040,13 +1031,8 @@@ int __devinit ioat_probe(struct ioatdma
         dma_cap_set(DMA_MEMCPY, dma->cap_mask);
         dma->dev = &pdev->dev;
   
- -      dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
- -              " %d channels, device version 0x%02x, driver version %s\n",
- -              dma->chancnt, device->version, IOAT_DMA_VERSION);
- -
         if (!dma->chancnt) {
- -              dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
- -                      "zero channels detected\n");
+ +              dev_err(dev, "zero channels detected\n");
                 goto err_setup_interrupts;
         }
   
@@@ -1039,7 -1054,7 +1040,7 @@@
         if (err)
                 goto err_setup_interrupts;
   
- -      err = ioat_dma_self_test(device);
+ +      err = device->self_test(device);
         if (err)
                 goto err_self_test;
   
@@@ -1082,113 -1097,6 +1083,113 @@@ static void ioat1_intr_quirk(struct ioa
         pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
   }
   
+ +static ssize_t ring_size_show(struct dma_chan *c, char *page)
+ +{
+ +      struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ +
+ +      return sprintf(page, "%d\n", ioat->desccount);
+ +}
+ +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+ +
+ +static ssize_t ring_active_show(struct dma_chan *c, char *page)
+ +{
+ +      struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ +
+ +      return sprintf(page, "%d\n", ioat->active);
+ +}
+ +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+ +
+ +static ssize_t cap_show(struct dma_chan *c, char *page)
+ +{
+ +      struct dma_device *dma = c->device;
+ +
+ +      return sprintf(page, "copy%s%s%s%s%s%s\n",
+ +                     dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+ +                     dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+ +                     dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+ +                     dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+ +                     dma_has_cap(DMA_MEMSET, dma->cap_mask)  ? " fill" : "",
+ +                     dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+ +
+ +}
+ +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+ +
+ +static ssize_t version_show(struct dma_chan *c, char *page)
+ +{
+ +      struct dma_device *dma = c->device;
+ +      struct ioatdma_device *device = to_ioatdma_device(dma);
+ +
+ +      return sprintf(page, "%d.%d\n",
+ +                     device->version >> 4, device->version & 0xf);
+ +}
+ +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+ +
+ +static struct attribute *ioat1_attrs[] = {
+ +      &ring_size_attr.attr,
+ +      &ring_active_attr.attr,
+ +      &ioat_cap_attr.attr,
+ +      &ioat_version_attr.attr,
+ +      NULL,
+ +};
+ +
+ +static ssize_t
+ +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+ +{
+ +      struct ioat_sysfs_entry *entry;
+ +      struct ioat_chan_common *chan;
+ +
+ +      entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ +      chan = container_of(kobj, struct ioat_chan_common, kobj);
+ +
+ +      if (!entry->show)
+ +              return -EIO;
+ +      return entry->show(&chan->common, page);
+ +}
+ +
+ +struct sysfs_ops ioat_sysfs_ops = {
+ +      .show   = ioat_attr_show,
+ +};
+ +
+ +static struct kobj_type ioat1_ktype = {
+ +      .sysfs_ops = &ioat_sysfs_ops,
+ +      .default_attrs = ioat1_attrs,
+ +};
+ +
+ +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+ +{
+ +      struct dma_device *dma = &device->common;
+ +      struct dma_chan *c;
+ +
+ +      list_for_each_entry(c, &dma->channels, device_node) {
+ +              struct ioat_chan_common *chan = to_chan_common(c);
+ +              struct kobject *parent = &c->dev->device.kobj;
+ +              int err;
+ +
+ +              err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+ +              if (err) {
+ +                      dev_warn(to_dev(chan),
+ +                               "sysfs init error (%d), continuing...\n", err);
+ +                      kobject_put(&chan->kobj);
+ +                      set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+ +              }
+ +      }
+ +}
+ +
+ +void ioat_kobject_del(struct ioatdma_device *device)
+ +{
+ +      struct dma_device *dma = &device->common;
+ +      struct dma_chan *c;
+ +
+ +      list_for_each_entry(c, &dma->channels, device_node) {
+ +              struct ioat_chan_common *chan = to_chan_common(c);
+ +
+ +              if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+ +                      kobject_del(&chan->kobj);
+ +                      kobject_put(&chan->kobj);
+ +              }
+ +      }
+ +}
+ +
   int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
   {
         struct pci_dev *pdev = device->pdev;
@@@ -1197,7 -1105,6 +1198,7 @@@
   
         device->intr_quirk = ioat1_intr_quirk;
         device->enumerate_channels = ioat1_enumerate_channels;
+ +      device->self_test = ioat_dma_self_test;
         dma = &device->common;
         dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
         dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
@@@ -1212,8 -1119,6 +1213,8 @@@
         err = ioat_register(device);
         if (err)
                 return err;
+ +      ioat_kobject_add(device, &ioat1_ktype);
+ +
         if (dca)
                 device->dca = ioat_dca_init(pdev, device->reg_base);
   
@@@ -1226,8 -1131,6 +1227,8 @@@ void __devexit ioat_dma_remove(struct i
   
         ioat_disable_interrupts(device);
   
+ +      ioat_kobject_del(device);
+ +
         dma_async_device_unregister(dma);
   
         pci_pool_destroy(device->dma_pool);
diff --combined drivers/dma/ioat/dma.h

index 0e37e426c729c86157424ca28b544ae1f250aa93,8966fa5453a797770a5f2d0f89165055efb11043..6a675a2a2d1cffbe05938d3981f4945a21df1997
--- 1/drivers/dma/ioat/dma.h
--- 2/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@@ -60,12 -60,8 +60,12 @@@
    * @dca: direct cache access context
    * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
    * @enumerate_channels: hw version specific channel enumeration
+ + * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ + * @timer_fn: select between the v2 and v3 timer watchdog routines
+ + * @self_test: hardware version specific self test for each supported op type
+ + *
+ + * Note: the v3 cleanup routine supports raid operations
    */
- -
   struct ioatdma_device {
         struct pci_dev *pdev;
         void __iomem *reg_base;
@@@ -78,9 -74,6 +78,9 @@@
         struct dca_provider *dca;
         void (*intr_quirk)(struct ioatdma_device *device);
         int (*enumerate_channels)(struct ioatdma_device *device);
+ +      void (*cleanup_tasklet)(unsigned long data);
+ +      void (*timer_fn)(unsigned long data);
+ +      int (*self_test)(struct ioatdma_device *device);
   };
   
   struct ioat_chan_common {
@@@ -93,7 -86,6 +93,7 @@@
         #define IOAT_COMPLETION_PENDING 0
         #define IOAT_COMPLETION_ACK 1
         #define IOAT_RESET_PENDING 2
+ +      #define IOAT_KOBJ_INIT_FAIL 3
         struct timer_list timer;
         #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
         #define IDLE_TIMEOUT msecs_to_jiffies(2000)
@@@ -102,13 -94,8 +102,13 @@@
         dma_addr_t completion_dma;
         u64 *completion;
         struct tasklet_struct cleanup_task;
+ +      struct kobject kobj;
   };
   
+ +struct ioat_sysfs_entry {
+ +      struct attribute attr;
+ +      ssize_t (*show)(struct dma_chan *, char *);
+ +};
   
   /**
    * struct ioat_dma_chan - internal representation of a DMA channel
@@@ -124,7 -111,6 +124,7 @@@ struct ioat_dma_chan 
   
         int pending;
         u16 desccount;
+ +      u16 active;
   };
   
   static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
@@@ -169,9 -155,9 +169,9 @@@ ioat_is_complete(struct dma_chan *c, dm
   
   /**
    * struct ioat_desc_sw - wrapper around hardware descriptor
- - * @hw: hardware DMA descriptor
+ + * @hw: hardware DMA descriptor (for memcpy)
    * @node: this descriptor will either be on the free list,
-  *     or attached to a transaction list (async_tx.tx_list)
+  *     or attached to a transaction list (tx_list)
    * @txd: the generic software descriptor for all engines
    * @id: identifier for debug
    */
@@@ -179,6 -165,7 +179,7 @@@ struct ioat_desc_sw 
         struct ioat_dma_descriptor *hw;
         struct list_head node;
         size_t len;
+       struct list_head tx_list;
         struct dma_async_tx_descriptor txd;
         #ifdef DEBUG
         int id;
@@@ -301,20 -288,9 +302,20 @@@ static inline bool is_ioat_bug(unsigne
                          IOAT_CHANERR_LENGTH_ERR));
   }
   
+ +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+ +                            int direction, enum dma_ctrl_flags flags, bool dst)
+ +{
+ +      if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+ +          (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+ +              pci_unmap_single(pdev, addr, len, direction);
+ +      else
+ +              pci_unmap_page(pdev, addr, len, direction);
+ +}
+ +
   int __devinit ioat_probe(struct ioatdma_device *device);
   int __devinit ioat_register(struct ioatdma_device *device);
   int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+ +int __devinit ioat_dma_self_test(struct ioatdma_device *device);
   void __devexit ioat_dma_remove(struct ioatdma_device *device);
   struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
                                               void __iomem *iobase);
@@@ -328,9 -304,4 +329,9 @@@ void ioat_dma_unmap(struct ioat_chan_co
                     size_t len, struct ioat_dma_descriptor *hw);
   bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
                            unsigned long *phys_complete);
+ +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+ +void ioat_kobject_del(struct ioatdma_device *device);
+ +extern struct sysfs_ops ioat_sysfs_ops;
+ +extern struct ioat_sysfs_entry ioat_version_attr;
+ +extern struct ioat_sysfs_entry ioat_cap_attr;
   #endif /* IOATDMA_H */
diff --combined drivers/dma/ioat/dma_v2.c

index 7bbbd83d12e68aa7bca1277e5819f65045b01a25,fa3d6db6624cf31597e9f4099b4560edfc541de9..5d6ac49e0d3258c4a92f48319271329ef2cd1137
--- 1/drivers/dma/ioat/dma_v2.c
--- 2/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@@ -39,7 -39,7 +39,7 @@@
   #include "registers.h"
   #include "hw.h"
   
- -static int ioat_ring_alloc_order = 8;
+ +int ioat_ring_alloc_order = 8;
   module_param(ioat_ring_alloc_order, int, 0644);
   MODULE_PARM_DESC(ioat_ring_alloc_order,
                  "ioat2+: allocate 2^n descriptors per channel (default: n=8)");
@@@ -48,7 -48,7 +48,7 @@@ module_param(ioat_ring_max_alloc_order
   MODULE_PARM_DESC(ioat_ring_max_alloc_order,
                  "ioat2+: upper limit for dynamic ring resizing (default: n=16)");
   
- -static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+ +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
   {
         void * __iomem reg_base = ioat->base.reg_base;
   
@@@ -63,7 -63,7 +63,7 @@@
                 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
   }
   
- -static void ioat2_issue_pending(struct dma_chan *chan)
+ +void ioat2_issue_pending(struct dma_chan *chan)
   {
         struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
   
@@@ -206,7 -206,7 +206,7 @@@ static void ioat2_cleanup(struct ioat2_
         spin_unlock_bh(&chan->cleanup_lock);
   }
   
- -static void ioat2_cleanup_tasklet(unsigned long data)
+ +void ioat2_cleanup_tasklet(unsigned long data)
   {
         struct ioat2_dma_chan *ioat = (void *) data;
   
@@@ -214,7 -214,7 +214,7 @@@
         writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
   }
   
- -static void __restart_chan(struct ioat2_dma_chan *ioat)
+ +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
   {
         struct ioat_chan_common *chan = &ioat->base;
   
@@@ -255,10 -255,12 +255,10 @@@ static void ioat2_restart_channel(struc
         if (ioat_cleanup_preamble(chan, &phys_complete))
                 __cleanup(ioat, phys_complete);
   
- -      __restart_chan(ioat);
+ +      __ioat2_restart_chan(ioat);
   }
   
- -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
- -
- -static void ioat2_timer_event(unsigned long data)
+ +void ioat2_timer_event(unsigned long data)
   {
         struct ioat2_dma_chan *ioat = (void *) data;
         struct ioat_chan_common *chan = &ioat->base;
@@@ -319,7 -321,7 +319,7 @@@
    * ioat2_enumerate_channels - find and initialize the device's channels
    * @device: the device to be enumerated
    */
- -static int ioat2_enumerate_channels(struct ioatdma_device *device)
+ +int ioat2_enumerate_channels(struct ioatdma_device *device)
   {
         struct ioat2_dma_chan *ioat;
         struct device *dev = &device->pdev->dev;
@@@ -352,8 -354,8 +352,8 @@@
                         break;
   
                 ioat_init_channel(device, &ioat->base, i,
- -                                ioat2_timer_event,
- -                                ioat2_cleanup_tasklet,
+ +                                device->timer_fn,
+ +                                device->cleanup_tasklet,
                                   (unsigned long) ioat);
                 ioat->xfercap_log = xfercap_log;
                 spin_lock_init(&ioat->ring_lock);
@@@ -397,11 -399,12 +397,12 @@@ static struct ioat_ring_ent *ioat2_allo
                 return NULL;
         memset(hw, 0, sizeof(*hw));
   
-       desc = kzalloc(sizeof(*desc), flags);
+       desc = kmem_cache_alloc(ioat2_cache, flags);
         if (!desc) {
                 pci_pool_free(dma->dma_pool, hw, phys);
                 return NULL;
         }
+       memset(desc, 0, sizeof(*desc));
   
         dma_async_tx_descriptor_init(&desc->txd, chan);
         desc->txd.tx_submit = ioat2_tx_submit_unlock;
@@@ -416,7 -419,7 +417,7 @@@ static void ioat2_free_ring_ent(struct 
   
         dma = to_ioatdma_device(chan->device);
         pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
-       kfree(desc);
+       kmem_cache_free(ioat2_cache, desc);
   }
   
   static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
@@@ -458,7 -461,7 +459,7 @@@
   /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
    * @chan: channel to be initialized
    */
- -static int ioat2_alloc_chan_resources(struct dma_chan *c)
+ +int ioat2_alloc_chan_resources(struct dma_chan *c)
   {
         struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
         struct ioat_chan_common *chan = &ioat->base;
@@@ -512,7 -515,7 +513,7 @@@
         return 1 << ioat->alloc_order;
   }
   
- -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+ +bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
   {
         /* reshape differs from normal ring allocation in that we want
          * to allocate a new software ring while only
@@@ -625,7 -628,7 +626,7 @@@
    * @ioat: ioat2,3 channel (ring) to operate on
    * @num_descs: allocation length
    */
- -static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+ +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
   {
         struct ioat_chan_common *chan = &ioat->base;
   
@@@ -653,11 -656,9 +654,11 @@@
                 spin_lock_bh(&chan->cleanup_lock);
                 if (jiffies > chan->timer.expires &&
                     timer_pending(&chan->timer)) {
+ +                      struct ioatdma_device *device = chan->device;
+ +
                         mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
                         spin_unlock_bh(&chan->cleanup_lock);
- -                      ioat2_timer_event((unsigned long) ioat);
+ +                      device->timer_fn((unsigned long) ioat);
                 } else
                         spin_unlock_bh(&chan->cleanup_lock);
                 return -ENOMEM;
@@@ -670,7 -671,7 +671,7 @@@
         return 0;  /* with ioat->ring_lock held */
   }
   
- -static struct dma_async_tx_descriptor *
+ +struct dma_async_tx_descriptor *
   ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
                            dma_addr_t dma_src, size_t len, unsigned long flags)
   {
@@@ -710,7 -711,6 +711,7 @@@
         desc->txd.flags = flags;
         desc->len = total_len;
         hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ +      hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
         hw->ctl_f.compl_write = 1;
         dump_desc_dbg(ioat, desc);
         /* we leave the channel locked to ensure in order submission */
@@@ -722,11 -722,11 +723,11 @@@
    * ioat2_free_chan_resources - release all the descriptors
    * @chan: the channel to be cleaned
    */
- -static void ioat2_free_chan_resources(struct dma_chan *c)
+ +void ioat2_free_chan_resources(struct dma_chan *c)
   {
         struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
         struct ioat_chan_common *chan = &ioat->base;
- -      struct ioatdma_device *ioatdma_device = chan->device;
+ +      struct ioatdma_device *device = chan->device;
         struct ioat_ring_ent *desc;
         const u16 total_descs = 1 << ioat->alloc_order;
         int descs;
@@@ -740,7 -740,7 +741,7 @@@
   
         tasklet_disable(&chan->cleanup_task);
         del_timer_sync(&chan->timer);
- -      ioat2_cleanup(ioat);
+ +      device->cleanup_tasklet((unsigned long) ioat);
   
         /* Delay 100ms after reset to allow internal DMA logic to quiesce
          * before removing DMA descriptor resources.
@@@ -770,7 -770,8 +771,7 @@@
         kfree(ioat->ring);
         ioat->ring = NULL;
         ioat->alloc_order = 0;
- -      pci_pool_free(ioatdma_device->completion_pool,
- -                    chan->completion,
+ +      pci_pool_free(device->completion_pool, chan->completion,
                       chan->completion_dma);
         spin_unlock_bh(&ioat->ring_lock);
   
@@@ -780,63 -781,66 +781,63 @@@
         ioat->dmacount = 0;
   }
   
- -static enum dma_status
+ +enum dma_status
   ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
                      dma_cookie_t *done, dma_cookie_t *used)
   {
         struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ +      struct ioatdma_device *device = ioat->base.device;
   
         if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
                 return DMA_SUCCESS;
   
- -      ioat2_cleanup(ioat);
+ +      device->cleanup_tasklet((unsigned long) ioat);
   
         return ioat_is_complete(c, cookie, done, used);
   }
   
- -int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+ +static ssize_t ring_size_show(struct dma_chan *c, char *page)
   {
- -      struct pci_dev *pdev = device->pdev;
- -      struct dma_device *dma;
- -      struct dma_chan *c;
- -      struct ioat_chan_common *chan;
- -      int err;
+ +      struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
   
- -      device->enumerate_channels = ioat2_enumerate_channels;
- -      dma = &device->common;
- -      dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
- -      dma->device_issue_pending = ioat2_issue_pending;
- -      dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
- -      dma->device_free_chan_resources = ioat2_free_chan_resources;
- -      dma->device_is_tx_complete = ioat2_is_complete;
+ +      return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+ +}
+ +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
   
- -      err = ioat_probe(device);
- -      if (err)
- -              return err;
- -      ioat_set_tcp_copy_break(2048);
+ +static ssize_t ring_active_show(struct dma_chan *c, char *page)
+ +{
+ +      struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
   
- -      list_for_each_entry(c, &dma->channels, device_node) {
- -              chan = to_chan_common(c);
- -              writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
- -                     chan->reg_base + IOAT_DCACTRL_OFFSET);
- -      }
+ +      /* ...taken outside the lock, no need to be precise */
+ +      return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+ +}
+ +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
   
- -      err = ioat_register(device);
- -      if (err)
- -              return err;
- -      if (dca)
- -              device->dca = ioat2_dca_init(pdev, device->reg_base);
+ +static struct attribute *ioat2_attrs[] = {
+ +      &ring_size_attr.attr,
+ +      &ring_active_attr.attr,
+ +      &ioat_cap_attr.attr,
+ +      &ioat_version_attr.attr,
+ +      NULL,
+ +};
   
- -      return err;
- -}
+ +struct kobj_type ioat2_ktype = {
+ +      .sysfs_ops = &ioat_sysfs_ops,
+ +      .default_attrs = ioat2_attrs,
+ +};
   
- -int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+ +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
   {
         struct pci_dev *pdev = device->pdev;
         struct dma_device *dma;
         struct dma_chan *c;
         struct ioat_chan_common *chan;
         int err;
- -      u16 dev_id;
   
         device->enumerate_channels = ioat2_enumerate_channels;
+ +      device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ +      device->timer_fn = ioat2_timer_event;
+ +      device->self_test = ioat_dma_self_test;
         dma = &device->common;
         dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
         dma->device_issue_pending = ioat2_issue_pending;
@@@ -844,25 -848,35 +845,25 @@@
         dma->device_free_chan_resources = ioat2_free_chan_resources;
         dma->device_is_tx_complete = ioat2_is_complete;
   
- -      /* -= IOAT ver.3 workarounds =- */
- -      /* Write CHANERRMSK_INT with 3E07h to mask out the errors
- -       * that can cause stability issues for IOAT ver.3
- -       */
- -      pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
- -
- -      /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
- -       * (workaround for spurious config parity error after restart)
- -       */
- -      pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
- -      if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
- -              pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
- -
         err = ioat_probe(device);
         if (err)
                 return err;
- -      ioat_set_tcp_copy_break(262144);
+ +      ioat_set_tcp_copy_break(2048);
   
         list_for_each_entry(c, &dma->channels, device_node) {
                 chan = to_chan_common(c);
- -              writel(IOAT_DMA_DCA_ANY_CPU,
+ +              writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
                        chan->reg_base + IOAT_DCACTRL_OFFSET);
         }
   
         err = ioat_register(device);
         if (err)
                 return err;
+ +
+ +      ioat_kobject_add(device, &ioat2_ktype);
+ +
         if (dca)
- -              device->dca = ioat3_dca_init(pdev, device->reg_base);
+ +              device->dca = ioat2_dca_init(pdev, device->reg_base);
   
         return err;
   }
diff --combined drivers/dma/ioat/dma_v2.h

index 246e646b1904d0a9f402d5ff6cf28a0a35a3b651,ac00adc81974adaf2545259ef1f5ddd7cc7ea7d2..1d849ef74d5f7ff3a14bc7350ce58ce7d42be9c8
--- 1/drivers/dma/ioat/dma_v2.h
--- 2/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@@ -27,7 -27,6 +27,7 @@@
   
   
   extern int ioat_pending_level;
+ +extern int ioat_ring_alloc_order;
   
   /*
    * workaround for IOAT ver.3.0 null descriptor issue
@@@ -115,36 -114,10 +115,36 @@@ static inline u16 ioat2_xferlen_to_desc
         return num_descs;
   }
   
+ +/**
+ + * struct ioat_ring_ent - wrapper around hardware descriptor
+ + * @hw: hardware DMA descriptor (for memcpy)
+ + * @fill: hardware fill descriptor
+ + * @xor: hardware xor descriptor
+ + * @xor_ex: hardware xor extension descriptor
+ + * @pq: hardware pq descriptor
+ + * @pq_ex: hardware pq extension descriptor
+ + * @pqu: hardware pq update descriptor
+ + * @raw: hardware raw (un-typed) descriptor
+ + * @txd: the generic software descriptor for all engines
+ + * @len: total transaction length for unmap
+ + * @result: asynchronous result of validate operations
+ + * @id: identifier for debug
+ + */
+ +
   struct ioat_ring_ent {
- -      struct ioat_dma_descriptor *hw;
+ +      union {
+ +              struct ioat_dma_descriptor *hw;
+ +              struct ioat_fill_descriptor *fill;
+ +              struct ioat_xor_descriptor *xor;
+ +              struct ioat_xor_ext_descriptor *xor_ex;
+ +              struct ioat_pq_descriptor *pq;
+ +              struct ioat_pq_ext_descriptor *pq_ex;
+ +              struct ioat_pq_update_descriptor *pqu;
+ +              struct ioat_raw_descriptor *raw;
+ +      };
-       struct dma_async_tx_descriptor txd;
         size_t len;
+       struct dma_async_tx_descriptor txd;
+ +      enum sum_check_flags *result;
         #ifdef DEBUG
         int id;
         #endif
@@@ -170,20 -143,5 +170,21 @@@ int __devinit ioat2_dma_probe(struct io
   int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
   struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
   struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+ +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+ +int ioat2_enumerate_channels(struct ioatdma_device *device);
+ +struct dma_async_tx_descriptor *
+ +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ +                         dma_addr_t dma_src, size_t len, unsigned long flags);
+ +void ioat2_issue_pending(struct dma_chan *chan);
+ +int ioat2_alloc_chan_resources(struct dma_chan *c);
+ +void ioat2_free_chan_resources(struct dma_chan *c);
+ +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ +                                dma_cookie_t *done, dma_cookie_t *used);
+ +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+ +bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+ +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+ +void ioat2_cleanup_tasklet(unsigned long data);
+ +void ioat2_timer_event(unsigned long data);
+ +extern struct kobj_type ioat2_ktype;
+ extern struct kmem_cache *ioat2_cache;
   #endif /* IOATDMA_V2_H */
diff --combined drivers/dma/ioat/pci.c

index b77d3a2864adbad8cbd31865883003c12f5cd447,61086c6bbf42a5baa8acae3945e8e8a6d8e038c7..c788fa2664708e99caaabe932dead8e3b61f40de
--- 1/drivers/dma/ioat/pci.c
--- 2/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@@ -36,44 -36,30 +36,44 @@@
   #include "hw.h"
   
   MODULE_VERSION(IOAT_DMA_VERSION);
- -MODULE_LICENSE("GPL");
+ +MODULE_LICENSE("Dual BSD/GPL");
   MODULE_AUTHOR("Intel Corporation");
   
   static struct pci_device_id ioat_pci_tbl[] = {
         /* I/OAT v1 platforms */
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+ +      { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
   
         /* I/OAT v2 platforms */
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
   
         /* I/OAT v3 platforms */
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
- -      { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+ +
+ +      /* I/OAT v3.2 platforms */
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+ +      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+ +
         { 0, }
   };
+ +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
   
   static int __devinit ioat_pci_probe(struct pci_dev *pdev,
                                     const struct pci_device_id *id);
@@@ -83,6 -69,8 +83,8 @@@ static int ioat_dca_enabled = 1
   module_param(ioat_dca_enabled, int, 0644);
   MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
   
+ struct kmem_cache *ioat2_cache;
+ 
   #define DRV_NAME "ioatdma"
   
   static struct pci_driver ioat_pci_driver = {
@@@ -182,15 -170,24 +184,27 @@@ static void __devexit ioat_remove(struc
   
   static int __init ioat_init_module(void)
   {
-       return pci_register_driver(&ioat_pci_driver);
+       int err;
+ 
+ +      pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+ +              DRV_NAME, IOAT_DMA_VERSION);
+ +
+       ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+                                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ioat2_cache)
+               return -ENOMEM;
+ 
+       err = pci_register_driver(&ioat_pci_driver);
+       if (err)
+               kmem_cache_destroy(ioat2_cache);
+ 
+       return err;
   }
   module_init(ioat_init_module);
   
   static void __exit ioat_exit_module(void)
   {
         pci_unregister_driver(&ioat_pci_driver);
+       kmem_cache_destroy(ioat2_cache);
   }
   module_exit(ioat_exit_module);
diff --combined drivers/dma/iop-adma.c

index 518f557ef857379298f2c53b31cce521342d5af8,9f6c16f8e2be722ae6bca256d6032169f73591a3..645ca8d54ec43350059bd8d7c8802f017b4a4abd
--- 1/drivers/dma/iop-adma.c
--- 2/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@@ -31,7 -31,6 +31,7 @@@
   #include <linux/platform_device.h>
   #include <linux/memory.h>
   #include <linux/ioport.h>
+ +#include <linux/raid/pq.h>
   
   #include <mach/adma.h>
   
@@@ -58,110 -57,65 +58,110 @@@ static void iop_adma_free_slots(struct 
         }
   }
   
+ +static void
+ +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+ +{
+ +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ +      struct iop_adma_desc_slot *unmap = desc->group_head;
+ +      struct device *dev = &iop_chan->device->pdev->dev;
+ +      u32 len = unmap->unmap_len;
+ +      enum dma_ctrl_flags flags = tx->flags;
+ +      u32 src_cnt;
+ +      dma_addr_t addr;
+ +      dma_addr_t dest;
+ +
+ +      src_cnt = unmap->unmap_src_cnt;
+ +      dest = iop_desc_get_dest_addr(unmap, iop_chan);
+ +      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ +              enum dma_data_direction dir;
+ +
+ +              if (src_cnt > 1) /* is xor? */
+ +                      dir = DMA_BIDIRECTIONAL;
+ +              else
+ +                      dir = DMA_FROM_DEVICE;
+ +
+ +              dma_unmap_page(dev, dest, len, dir);
+ +      }
+ +
+ +      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ +              while (src_cnt--) {
+ +                      addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+ +                      if (addr == dest)
+ +                              continue;
+ +                      dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ +              }
+ +      }
+ +      desc->group_head = NULL;
+ +}
+ +
+ +static void
+ +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+ +{
+ +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ +      struct iop_adma_desc_slot *unmap = desc->group_head;
+ +      struct device *dev = &iop_chan->device->pdev->dev;
+ +      u32 len = unmap->unmap_len;
+ +      enum dma_ctrl_flags flags = tx->flags;
+ +      u32 src_cnt = unmap->unmap_src_cnt;
+ +      dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+ +      dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+ +      int i;
+ +
+ +      if (tx->flags & DMA_PREP_CONTINUE)
+ +              src_cnt -= 3;
+ +
+ +      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+ +              dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+ +              dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+ +      }
+ +
+ +      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ +              dma_addr_t addr;
+ +
+ +              for (i = 0; i < src_cnt; i++) {
+ +                      addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+ +                      dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ +              }
+ +              if (desc->pq_check_result) {
+ +                      dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+ +                      dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+ +              }
+ +      }
+ +
+ +      desc->group_head = NULL;
+ +}
+ +
+ +
   static dma_cookie_t
   iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
         struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
   {
- -      BUG_ON(desc->async_tx.cookie < 0);
- -      if (desc->async_tx.cookie > 0) {
- -              cookie = desc->async_tx.cookie;
- -              desc->async_tx.cookie = 0;
+ +      struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ +
+ +      BUG_ON(tx->cookie < 0);
+ +      if (tx->cookie > 0) {
+ +              cookie = tx->cookie;
+ +              tx->cookie = 0;
   
                 /* call the callback (must not sleep or submit new
                  * operations to this channel)
                  */
- -              if (desc->async_tx.callback)
- -                      desc->async_tx.callback(
- -                              desc->async_tx.callback_param);
+ +              if (tx->callback)
+ +                      tx->callback(tx->callback_param);
   
                 /* unmap dma addresses
                  * (unmap_single vs unmap_page?)
                  */
                 if (desc->group_head && desc->unmap_len) {
- -                      struct iop_adma_desc_slot *unmap = desc->group_head;
- -                      struct device *dev =
- -                              &iop_chan->device->pdev->dev;
- -                      u32 len = unmap->unmap_len;
- -                      enum dma_ctrl_flags flags = desc->async_tx.flags;
- -                      u32 src_cnt;
- -                      dma_addr_t addr;
- -                      dma_addr_t dest;
- -
- -                      src_cnt = unmap->unmap_src_cnt;
- -                      dest = iop_desc_get_dest_addr(unmap, iop_chan);
- -                      if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- -                              enum dma_data_direction dir;
- -
- -                              if (src_cnt > 1) /* is xor? */
- -                                      dir = DMA_BIDIRECTIONAL;
- -                              else
- -                                      dir = DMA_FROM_DEVICE;
- -
- -                              dma_unmap_page(dev, dest, len, dir);
- -                      }
- -
- -                      if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- -                              while (src_cnt--) {
- -                                      addr = iop_desc_get_src_addr(unmap,
- -                                                                   iop_chan,
- -                                                                   src_cnt);
- -                                      if (addr == dest)
- -                                              continue;
- -                                      dma_unmap_page(dev, addr, len,
- -                                                     DMA_TO_DEVICE);
- -                              }
- -                      }
- -                      desc->group_head = NULL;
+ +                      if (iop_desc_is_pq(desc))
+ +                              iop_desc_unmap_pq(iop_chan, desc);
+ +                      else
+ +                              iop_desc_unmap(iop_chan, desc);
                 }
         }
   
         /* run dependent operations */
- -      dma_run_dependencies(&desc->async_tx);
+ +      dma_run_dependencies(tx);
   
         return cookie;
   }
@@@ -333,12 -287,7 +333,12 @@@ static void iop_adma_tasklet(unsigned l
   {
         struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
   
- -      spin_lock(&iop_chan->lock);
+ +      /* lockdep will flag depedency submissions as potentially
+ +       * recursive locking, this is not the case as a dependency
+ +       * submission will never recurse a channels submit routine.
+ +       * There are checks in async_tx.c to prevent this.
+ +       */
+ +      spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
         __iop_adma_slot_cleanup(iop_chan);
         spin_unlock(&iop_chan->lock);
   }
@@@ -421,7 -370,7 +421,7 @@@ retry
                         }
                         alloc_tail->group_head = alloc_start;
                         alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                         iop_chan->last_used = last_used;
                         iop_desc_clear_next_desc(alloc_start);
                         iop_desc_clear_next_desc(alloc_tail);
@@@ -480,7 -429,7 +480,7 @@@ iop_adma_tx_submit(struct dma_async_tx_
   
         old_chain_tail = list_entry(iop_chan->chain.prev,
                 struct iop_adma_desc_slot, chain_node);
-       list_splice_init(&sw_desc->async_tx.tx_list,
+       list_splice_init(&sw_desc->tx_list,
                          &old_chain_tail->chain_node);
   
         /* fix up the hardware chain */
@@@ -547,6 -496,7 +547,7 @@@ static int iop_adma_alloc_chan_resource
   
                 dma_async_tx_descriptor_init(&slot->async_tx, chan);
                 slot->async_tx.tx_submit = iop_adma_tx_submit;
+               INIT_LIST_HEAD(&slot->tx_list);
                 INIT_LIST_HEAD(&slot->chain_node);
                 INIT_LIST_HEAD(&slot->slot_node);
                 hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@@ -711,9 -661,9 +712,9 @@@ iop_adma_prep_dma_xor(struct dma_chan *
   }
   
   static struct dma_async_tx_descriptor *
- -iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
- -                         unsigned int src_cnt, size_t len, u32 *result,
- -                         unsigned long flags)
+ +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+ +                        unsigned int src_cnt, size_t len, u32 *result,
+ +                        unsigned long flags)
   {
         struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
         struct iop_adma_desc_slot *sw_desc, *grp_start;
@@@ -747,118 -697,6 +748,118 @@@
         return sw_desc ? &sw_desc->async_tx : NULL;
   }
   
+ +static struct dma_async_tx_descriptor *
+ +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ +                   unsigned int src_cnt, const unsigned char *scf, size_t len,
+ +                   unsigned long flags)
+ +{
+ +      struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ +      struct iop_adma_desc_slot *sw_desc, *g;
+ +      int slot_cnt, slots_per_op;
+ +      int continue_srcs;
+ +
+ +      if (unlikely(!len))
+ +              return NULL;
+ +      BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+ +
+ +      dev_dbg(iop_chan->device->common.dev,
+ +              "%s src_cnt: %d len: %u flags: %lx\n",
+ +              __func__, src_cnt, len, flags);
+ +
+ +      if (dmaf_p_disabled_continue(flags))
+ +              continue_srcs = 1+src_cnt;
+ +      else if (dmaf_continue(flags))
+ +              continue_srcs = 3+src_cnt;
+ +      else
+ +              continue_srcs = 0+src_cnt;
+ +
+ +      spin_lock_bh(&iop_chan->lock);
+ +      slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+ +      sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ +      if (sw_desc) {
+ +              int i;
+ +
+ +              g = sw_desc->group_head;
+ +              iop_desc_set_byte_count(g, iop_chan, len);
+ +
+ +              /* even if P is disabled its destination address (bits
+ +               * [3:0]) must match Q.  It is ok if P points to an
+ +               * invalid address, it won't be written.
+ +               */
+ +              if (flags & DMA_PREP_PQ_DISABLE_P)
+ +                      dst[0] = dst[1] & 0x7;
+ +
+ +              iop_desc_set_pq_addr(g, dst);
+ +              sw_desc->unmap_src_cnt = src_cnt;
+ +              sw_desc->unmap_len = len;
+ +              sw_desc->async_tx.flags = flags;
+ +              for (i = 0; i < src_cnt; i++)
+ +                      iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+ +
+ +              /* if we are continuing a previous operation factor in
+ +               * the old p and q values, see the comment for dma_maxpq
+ +               * in include/linux/dmaengine.h
+ +               */
+ +              if (dmaf_p_disabled_continue(flags))
+ +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ +              else if (dmaf_continue(flags)) {
+ +                      iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+ +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ +                      iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+ +              }
+ +              iop_desc_init_pq(g, i, flags);
+ +      }
+ +      spin_unlock_bh(&iop_chan->lock);
+ +
+ +      return sw_desc ? &sw_desc->async_tx : NULL;
+ +}
+ +
+ +static struct dma_async_tx_descriptor *
+ +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ +                       unsigned int src_cnt, const unsigned char *scf,
+ +                       size_t len, enum sum_check_flags *pqres,
+ +                       unsigned long flags)
+ +{
+ +      struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ +      struct iop_adma_desc_slot *sw_desc, *g;
+ +      int slot_cnt, slots_per_op;
+ +
+ +      if (unlikely(!len))
+ +              return NULL;
+ +      BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+ +
+ +      dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ +              __func__, src_cnt, len);
+ +
+ +      spin_lock_bh(&iop_chan->lock);
+ +      slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+ +      sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ +      if (sw_desc) {
+ +              /* for validate operations p and q are tagged onto the
+ +               * end of the source list
+ +               */
+ +              int pq_idx = src_cnt;
+ +
+ +              g = sw_desc->group_head;
+ +              iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+ +              iop_desc_set_pq_zero_sum_byte_count(g, len);
+ +              g->pq_check_result = pqres;
+ +              pr_debug("\t%s: g->pq_check_result: %p\n",
+ +                      __func__, g->pq_check_result);
+ +              sw_desc->unmap_src_cnt = src_cnt+2;
+ +              sw_desc->unmap_len = len;
+ +              sw_desc->async_tx.flags = flags;
+ +              while (src_cnt--)
+ +                      iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+ +                                                        src[src_cnt],
+ +                                                        scf[src_cnt]);
+ +              iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+ +      }
+ +      spin_unlock_bh(&iop_chan->lock);
+ +
+ +      return sw_desc ? &sw_desc->async_tx : NULL;
+ +}
+ +
   static void iop_adma_free_chan_resources(struct dma_chan *chan)
   {
         struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@@ -1069,7 -907,7 +1070,7 @@@ out
   
   #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
   static int __devinit
- -iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+ +iop_adma_xor_val_self_test(struct iop_adma_device *device)
   {
         int i, src_idx;
         struct page *dest;
@@@ -1165,7 -1003,7 +1166,7 @@@
                 PAGE_SIZE, DMA_TO_DEVICE);
   
         /* skip zero sum if the capability is not present */
- -      if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+ +      if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
                 goto free_resources;
   
         /* zero sum the sources with the destintation page */
@@@ -1179,10 -1017,10 +1180,10 @@@
                 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                            zero_sum_srcs[i], 0, PAGE_SIZE,
                                            DMA_TO_DEVICE);
- -      tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- -                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- -                                      &zero_sum_result,
- -                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ +      tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ +                                     IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ +                                     &zero_sum_result,
+ +                                     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
   
         cookie = iop_adma_tx_submit(tx);
         iop_adma_issue_pending(dma_chan);
@@@ -1235,10 -1073,10 +1236,10 @@@
                 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                            zero_sum_srcs[i], 0, PAGE_SIZE,
                                            DMA_TO_DEVICE);
- -      tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
- -                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
- -                                      &zero_sum_result,
- -                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ +      tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ +                                     IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ +                                     &zero_sum_result,
+ +                                     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
   
         cookie = iop_adma_tx_submit(tx);
         iop_adma_issue_pending(dma_chan);
@@@ -1268,170 -1106,6 +1269,170 @@@ out
         return err;
   }
   
+ +#ifdef CONFIG_MD_RAID6_PQ
+ +static int __devinit
+ +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+ +{
+ +      /* combined sources, software pq results, and extra hw pq results */
+ +      struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+ +      /* ptr to the extra hw pq buffers defined above */
+ +      struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+ +      /* address conversion buffers (dma_map / page_address) */
+ +      void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+ +      dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+ +      dma_addr_t pq_dest[2];
+ +
+ +      int i;
+ +      struct dma_async_tx_descriptor *tx;
+ +      struct dma_chan *dma_chan;
+ +      dma_cookie_t cookie;
+ +      u32 zero_sum_result;
+ +      int err = 0;
+ +      struct device *dev;
+ +
+ +      dev_dbg(device->common.dev, "%s\n", __func__);
+ +
+ +      for (i = 0; i < ARRAY_SIZE(pq); i++) {
+ +              pq[i] = alloc_page(GFP_KERNEL);
+ +              if (!pq[i]) {
+ +                      while (i--)
+ +                              __free_page(pq[i]);
+ +                      return -ENOMEM;
+ +              }
+ +      }
+ +
+ +      /* Fill in src buffers */
+ +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+ +              pq_sw[i] = page_address(pq[i]);
+ +              memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+ +      }
+ +      pq_sw[i] = page_address(pq[i]);
+ +      pq_sw[i+1] = page_address(pq[i+1]);
+ +
+ +      dma_chan = container_of(device->common.channels.next,
+ +                              struct dma_chan,
+ +                              device_node);
+ +      if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ +              err = -ENODEV;
+ +              goto out;
+ +      }
+ +
+ +      dev = dma_chan->device->dev;
+ +
+ +      /* initialize the dests */
+ +      memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+ +      memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+ +
+ +      /* test pq */
+ +      pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ +      pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ +                                       DMA_TO_DEVICE);
+ +
+ +      tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+ +                                IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+ +                                PAGE_SIZE,
+ +                                DMA_PREP_INTERRUPT |
+ +                                DMA_CTRL_ACK);
+ +
+ +      cookie = iop_adma_tx_submit(tx);
+ +      iop_adma_issue_pending(dma_chan);
+ +      msleep(8);
+ +
+ +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ +              DMA_SUCCESS) {
+ +              dev_err(dev, "Self-test pq timed out, disabling\n");
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +      raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+ +
+ +      if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+ +                 page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+ +              dev_err(dev, "Self-test p failed compare, disabling\n");
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +      if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+ +                 page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+ +              dev_err(dev, "Self-test q failed compare, disabling\n");
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +      /* test correct zero sum using the software generated pq values */
+ +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ +                                       DMA_TO_DEVICE);
+ +
+ +      zero_sum_result = ~0;
+ +      tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ +                                    pq_src, IOP_ADMA_NUM_SRC_TEST,
+ +                                    raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ +                                    DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+ +
+ +      cookie = iop_adma_tx_submit(tx);
+ +      iop_adma_issue_pending(dma_chan);
+ +      msleep(8);
+ +
+ +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ +              DMA_SUCCESS) {
+ +              dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +      if (zero_sum_result != 0) {
+ +              dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+ +                      zero_sum_result);
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +      /* test incorrect zero sum */
+ +      i = IOP_ADMA_NUM_SRC_TEST;
+ +      memset(pq_sw[i] + 100, 0, 100);
+ +      memset(pq_sw[i+1] + 200, 0, 200);
+ +      for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ +              pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ +                                       DMA_TO_DEVICE);
+ +
+ +      zero_sum_result = 0;
+ +      tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ +                                    pq_src, IOP_ADMA_NUM_SRC_TEST,
+ +                                    raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ +                                    DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+ +
+ +      cookie = iop_adma_tx_submit(tx);
+ +      iop_adma_issue_pending(dma_chan);
+ +      msleep(8);
+ +
+ +      if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ +              DMA_SUCCESS) {
+ +              dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +      if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+ +              dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+ +                      zero_sum_result);
+ +              err = -ENODEV;
+ +              goto free_resources;
+ +      }
+ +
+ +free_resources:
+ +      iop_adma_free_chan_resources(dma_chan);
+ +out:
+ +      i = ARRAY_SIZE(pq);
+ +      while (i--)
+ +              __free_page(pq[i]);
+ +      return err;
+ +}
+ +#endif
+ +
   static int __devexit iop_adma_remove(struct platform_device *dev)
   {
         struct iop_adma_device *device = platform_get_drvdata(dev);
@@@ -1519,16 -1193,9 +1520,16 @@@ static int __devinit iop_adma_probe(str
                 dma_dev->max_xor = iop_adma_get_max_xor();
                 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
         }
- -      if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
- -              dma_dev->device_prep_dma_zero_sum =
- -                      iop_adma_prep_dma_zero_sum;
+ +      if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+ +              dma_dev->device_prep_dma_xor_val =
+ +                      iop_adma_prep_dma_xor_val;
+ +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ +              dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+ +              dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+ +      }
+ +      if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+ +              dma_dev->device_prep_dma_pq_val =
+ +                      iop_adma_prep_dma_pq_val;
         if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
                 dma_dev->device_prep_dma_interrupt =
                         iop_adma_prep_dma_interrupt;
@@@ -1582,35 -1249,23 +1583,35 @@@
         }
   
         if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
- -              dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
- -              ret = iop_adma_xor_zero_sum_self_test(adev);
+ +          dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ +              ret = iop_adma_xor_val_self_test(adev);
                 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
                 if (ret)
                         goto err_free_iop_chan;
         }
   
+ +      if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+ +          dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+ +              #ifdef CONFIG_MD_RAID6_PQ
+ +              ret = iop_adma_pq_zero_sum_self_test(adev);
+ +              dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+ +              #else
+ +              /* can not test raid6, so do not publish capability */
+ +              dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+ +              dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+ +              ret = 0;
+ +              #endif
+ +              if (ret)
+ +                      goto err_free_iop_chan;
+ +      }
+ +
         dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
- -        "( %s%s%s%s%s%s%s%s%s%s)\n",
- -        dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
- -        dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
- -        dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+ +        "( %s%s%s%s%s%s%s)\n",
+ +        dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+ +        dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
           dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
- -        dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
- -        dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+ +        dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
           dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
- -        dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
           dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
           dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
   
@@@ -1642,7 -1297,7 +1643,7 @@@ static void iop_chan_start_null_memcpy(
         if (sw_desc) {
                 grp_start = sw_desc->group_head;
   
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                 async_tx_ack(&sw_desc->async_tx);
                 iop_desc_init_memcpy(grp_start, 0);
                 iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@@ -1698,7 -1353,7 +1699,7 @@@ static void iop_chan_start_null_xor(str
         sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
         if (sw_desc) {
                 grp_start = sw_desc->group_head;
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                 async_tx_ack(&sw_desc->async_tx);
                 iop_desc_init_null_xor(grp_start, 2, 0);
                 iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --combined drivers/md/Kconfig

index 09c0c6e49ab596f2e5be99bb0fe4bc10bc4b367d,020f9573fd82011babb4ad666a2966a44d088aba..2158377a13593a45938278ac860d5de3db8a06fa
--- 1/drivers/md/Kconfig
--- 2/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@@ -124,8 -124,6 +124,8 @@@ config MD_RAID45
         select MD_RAID6_PQ
         select ASYNC_MEMCPY
         select ASYNC_XOR
+ +      select ASYNC_PQ
+ +      select ASYNC_RAID6_RECOV
         ---help---
           A RAID-5 set of N drives with a capacity of C MB per drive provides
           the capacity of C * (N - 1) MB, and protects against a failure
@@@ -154,33 -152,9 +154,33 @@@
   
           If unsure, say Y.
   
+ +config MULTICORE_RAID456
+ +      bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
+ +      depends on MD_RAID456
+ +      depends on SMP
+ +      depends on EXPERIMENTAL
+ +      ---help---
+ +        Enable the raid456 module to dispatch per-stripe raid operations to a
+ +        thread pool.
+ +
+ +        If unsure, say N.
+ +
   config MD_RAID6_PQ
         tristate
   
+ +config ASYNC_RAID6_TEST
+ +      tristate "Self test for hardware accelerated raid6 recovery"
+ +      depends on MD_RAID6_PQ
+ +      select ASYNC_RAID6_RECOV
+ +      ---help---
+ +        This is a one-shot self test that permutes through the
+ +        recovery of all the possible two disk failure scenarios for a
+ +        N-disk array.  Recovery is performed with the asynchronous
+ +        raid6 recovery routines, and will optionally use an offload
+ +        engine if one is available.
+ +
+ +        If unsure, say N.
+ +
   config MD_MULTIPATH
         tristate "Multipath I/O support"
         depends on BLK_DEV_MD
@@@ -257,6 -231,17 +257,17 @@@ config DM_MIRRO
            Allow volume managers to mirror logical volumes, also
            needed for live data migration tools such as 'pvmove'.
   
+ config DM_LOG_USERSPACE
+       tristate "Mirror userspace logging (EXPERIMENTAL)"
+       depends on DM_MIRROR && EXPERIMENTAL && NET
+       select CONNECTOR
+       ---help---
+         The userspace logging module provides a mechanism for
+         relaying the dm-dirty-log API to userspace.  Log designs
+         which are more suited to userspace implementation (e.g.
+         shared storage logs) or experimental logs can be implemented
+         by leveraging this framework.
+ 
   config DM_ZERO
         tristate "Zero target"
         depends on BLK_DEV_DM
@@@ -275,6 -260,25 +286,25 @@@ config DM_MULTIPAT
         ---help---
           Allow volume managers to support multipath hardware.
   
+ config DM_MULTIPATH_QL
+       tristate "I/O Path Selector based on the number of in-flight I/Os"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path with the least number of in-flight I/Os.
+ 
+         If unsure, say N.
+ 
+ config DM_MULTIPATH_ST
+       tristate "I/O Path Selector based on the service time"
+       depends on DM_MULTIPATH
+       ---help---
+         This path selector is a dynamic load balancer which selects
+         the path expected to complete the incoming I/O in the shortest
+         time.
+ 
+         If unsure, say N.
+ 
   config DM_DELAY
         tristate "I/O delaying target (EXPERIMENTAL)"
         depends on BLK_DEV_DM && EXPERIMENTAL
diff --combined drivers/md/raid5.c

index 54ef8d75541d3cc8607e37b0a128305390f3dff5,f9f991e6e1389ffc90860571edf701ca13a6f292..cac6f4d3a14310682f72cc5e2468547519c52b6c
--- 1/drivers/md/raid5.c
--- 2/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@@ -47,9 -47,7 +47,9 @@@
   #include <linux/kthread.h>
   #include <linux/raid/pq.h>
   #include <linux/async_tx.h>
+ +#include <linux/async.h>
   #include <linux/seq_file.h>
+ +#include <linux/cpu.h>
   #include "md.h"
   #include "raid5.h"
   #include "bitmap.h"
@@@ -501,18 -499,11 +501,18 @@@ async_copy_data(int frombio, struct bi
         struct page *bio_page;
         int i;
         int page_offset;
+ +      struct async_submit_ctl submit;
+ +      enum async_tx_flags flags = 0;
   
         if (bio->bi_sector >= sector)
                 page_offset = (signed)(bio->bi_sector - sector) * 512;
         else
                 page_offset = (signed)(sector - bio->bi_sector) * -512;
+ +
+ +      if (frombio)
+ +              flags |= ASYNC_TX_FENCE;
+ +      init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+ +
         bio_for_each_segment(bvl, bio, i) {
                 int len = bio_iovec_idx(bio, i)->bv_len;
                 int clen;
@@@ -534,14 -525,15 +534,14 @@@
                         bio_page = bio_iovec_idx(bio, i)->bv_page;
                         if (frombio)
                                 tx = async_memcpy(page, bio_page, page_offset,
- -                                      b_offset, clen,
- -                                      ASYNC_TX_DEP_ACK,
- -                                      tx, NULL, NULL);
+ +                                                b_offset, clen, &submit);
                         else
                                 tx = async_memcpy(bio_page, page, b_offset,
- -                                      page_offset, clen,
- -                                      ASYNC_TX_DEP_ACK,
- -                                      tx, NULL, NULL);
+ +                                                page_offset, clen, &submit);
                 }
+ +              /* chain the operations */
+ +              submit.depend_tx = tx;
+ +
                 if (clen < len) /* hit end of page */
                         break;
                 page_offset +=  len;
@@@ -600,7 -592,6 +600,7 @@@ static void ops_run_biofill(struct stri
   {
         struct dma_async_tx_descriptor *tx = NULL;
         raid5_conf_t *conf = sh->raid_conf;
+ +      struct async_submit_ctl submit;
         int i;
   
         pr_debug("%s: stripe %llu\n", __func__,
@@@ -624,34 -615,22 +624,34 @@@
         }
   
         atomic_inc(&sh->count);
- -      async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- -              ops_complete_biofill, sh);
+ +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+ +      async_trigger_callback(&submit);
   }
   
- -static void ops_complete_compute5(void *stripe_head_ref)
+ +static void mark_target_uptodate(struct stripe_head *sh, int target)
   {
- -      struct stripe_head *sh = stripe_head_ref;
- -      int target = sh->ops.target;
- -      struct r5dev *tgt = &sh->dev[target];
+ +      struct r5dev *tgt;
   
- -      pr_debug("%s: stripe %llu\n", __func__,
- -              (unsigned long long)sh->sector);
+ +      if (target < 0)
+ +              return;
   
+ +      tgt = &sh->dev[target];
         set_bit(R5_UPTODATE, &tgt->flags);
         BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
         clear_bit(R5_Wantcompute, &tgt->flags);
+ +}
+ +
+ +static void ops_complete_compute(void *stripe_head_ref)
+ +{
+ +      struct stripe_head *sh = stripe_head_ref;
+ +
+ +      pr_debug("%s: stripe %llu\n", __func__,
+ +              (unsigned long long)sh->sector);
+ +
+ +      /* mark the computed target(s) as uptodate */
+ +      mark_target_uptodate(sh, sh->ops.target);
+ +      mark_target_uptodate(sh, sh->ops.target2);
+ +
         clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
         if (sh->check_state == check_state_compute_run)
                 sh->check_state = check_state_compute_result;
@@@ -659,24 -638,16 +659,24 @@@
         release_stripe(sh);
   }
   
- -static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
+ +/* return a pointer to the address conversion region of the scribble buffer */
+ +static addr_conv_t *to_addr_conv(struct stripe_head *sh,
+ +                               struct raid5_percpu *percpu)
+ +{
+ +      return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+ +}
+ +
+ +static struct dma_async_tx_descriptor *
+ +ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
   {
- -      /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
- -      struct page *xor_srcs[disks];
+ +      struct page **xor_srcs = percpu->scribble;
         int target = sh->ops.target;
         struct r5dev *tgt = &sh->dev[target];
         struct page *xor_dest = tgt->page;
         int count = 0;
         struct dma_async_tx_descriptor *tx;
+ +      struct async_submit_ctl submit;
         int i;
   
         pr_debug("%s: stripe %llu block: %d\n",
@@@ -689,212 -660,17 +689,212 @@@
   
         atomic_inc(&sh->count);
   
+ +      init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+ +                        ops_complete_compute, sh, to_addr_conv(sh, percpu));
         if (unlikely(count == 1))
- -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- -                      0, NULL, ops_complete_compute5, sh);
+ +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ +      else
+ +              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+ +
+ +      return tx;
+ +}
+ +
+ +/* set_syndrome_sources - populate source buffers for gen_syndrome
+ + * @srcs - (struct page *) array of size sh->disks
+ + * @sh - stripe_head to parse
+ + *
+ + * Populates srcs in proper layout order for the stripe and returns the
+ + * 'count' of sources to be used in a call to async_gen_syndrome.  The P
+ + * destination buffer is recorded in srcs[count] and the Q destination
+ + * is recorded in srcs[count+1]].
+ + */
+ +static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+ +{
+ +      int disks = sh->disks;
+ +      int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
+ +      int d0_idx = raid6_d0(sh);
+ +      int count;
+ +      int i;
+ +
+ +      for (i = 0; i < disks; i++)
+ +              srcs[i] = (void *)raid6_empty_zero_page;
+ +
+ +      count = 0;
+ +      i = d0_idx;
+ +      do {
+ +              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+ +
+ +              srcs[slot] = sh->dev[i].page;
+ +              i = raid6_next_disk(i, disks);
+ +      } while (i != d0_idx);
+ +      BUG_ON(count != syndrome_disks);
+ +
+ +      return count;
+ +}
+ +
+ +static struct dma_async_tx_descriptor *
+ +ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
+ +{
+ +      int disks = sh->disks;
+ +      struct page **blocks = percpu->scribble;
+ +      int target;
+ +      int qd_idx = sh->qd_idx;
+ +      struct dma_async_tx_descriptor *tx;
+ +      struct async_submit_ctl submit;
+ +      struct r5dev *tgt;
+ +      struct page *dest;
+ +      int i;
+ +      int count;
+ +
+ +      if (sh->ops.target < 0)
+ +              target = sh->ops.target2;
+ +      else if (sh->ops.target2 < 0)
+ +              target = sh->ops.target;
         else
- -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- -                      ASYNC_TX_XOR_ZERO_DST, NULL,
- -                      ops_complete_compute5, sh);
+ +              /* we should only have one valid target */
+ +              BUG();
+ +      BUG_ON(target < 0);
+ +      pr_debug("%s: stripe %llu block: %d\n",
+ +              __func__, (unsigned long long)sh->sector, target);
+ +
+ +      tgt = &sh->dev[target];
+ +      BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ +      dest = tgt->page;
+ +
+ +      atomic_inc(&sh->count);
+ +
+ +      if (target == qd_idx) {
+ +              count = set_syndrome_sources(blocks, sh);
+ +              blocks[count] = NULL; /* regenerating p is not necessary */
+ +              BUG_ON(blocks[count+1] != dest); /* q should already be set */
+ +              init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ +                                ops_complete_compute, sh,
+ +                                to_addr_conv(sh, percpu));
+ +              tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ +      } else {
+ +              /* Compute any data- or p-drive using XOR */
+ +              count = 0;
+ +              for (i = disks; i-- ; ) {
+ +                      if (i == target || i == qd_idx)
+ +                              continue;
+ +                      blocks[count++] = sh->dev[i].page;
+ +              }
+ +
+ +              init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ +                                NULL, ops_complete_compute, sh,
+ +                                to_addr_conv(sh, percpu));
+ +              tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+ +      }
   
         return tx;
   }
   
+ +static struct dma_async_tx_descriptor *
+ +ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
+ +{
+ +      int i, count, disks = sh->disks;
+ +      int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+ +      int d0_idx = raid6_d0(sh);
+ +      int faila = -1, failb = -1;
+ +      int target = sh->ops.target;
+ +      int target2 = sh->ops.target2;
+ +      struct r5dev *tgt = &sh->dev[target];
+ +      struct r5dev *tgt2 = &sh->dev[target2];
+ +      struct dma_async_tx_descriptor *tx;
+ +      struct page **blocks = percpu->scribble;
+ +      struct async_submit_ctl submit;
+ +
+ +      pr_debug("%s: stripe %llu block1: %d block2: %d\n",
+ +               __func__, (unsigned long long)sh->sector, target, target2);
+ +      BUG_ON(target < 0 || target2 < 0);
+ +      BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+ +      BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
+ +
+ +      /* we need to open-code set_syndrome_sources to handle to the
+ +       * slot number conversion for 'faila' and 'failb'
+ +       */
+ +      for (i = 0; i < disks ; i++)
+ +              blocks[i] = (void *)raid6_empty_zero_page;
+ +      count = 0;
+ +      i = d0_idx;
+ +      do {
+ +              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+ +
+ +              blocks[slot] = sh->dev[i].page;
+ +
+ +              if (i == target)
+ +                      faila = slot;
+ +              if (i == target2)
+ +                      failb = slot;
+ +              i = raid6_next_disk(i, disks);
+ +      } while (i != d0_idx);
+ +      BUG_ON(count != syndrome_disks);
+ +
+ +      BUG_ON(faila == failb);
+ +      if (failb < faila)
+ +              swap(faila, failb);
+ +      pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
+ +               __func__, (unsigned long long)sh->sector, faila, failb);
+ +
+ +      atomic_inc(&sh->count);
+ +
+ +      if (failb == syndrome_disks+1) {
+ +              /* Q disk is one of the missing disks */
+ +              if (faila == syndrome_disks) {
+ +                      /* Missing P+Q, just recompute */
+ +                      init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ +                                        ops_complete_compute, sh,
+ +                                        to_addr_conv(sh, percpu));
+ +                      return async_gen_syndrome(blocks, 0, count+2,
+ +                                                STRIPE_SIZE, &submit);
+ +              } else {
+ +                      struct page *dest;
+ +                      int data_target;
+ +                      int qd_idx = sh->qd_idx;
+ +
+ +                      /* Missing D+Q: recompute D from P, then recompute Q */
+ +                      if (target == qd_idx)
+ +                              data_target = target2;
+ +                      else
+ +                              data_target = target;
+ +
+ +                      count = 0;
+ +                      for (i = disks; i-- ; ) {
+ +                              if (i == data_target || i == qd_idx)
+ +                                      continue;
+ +                              blocks[count++] = sh->dev[i].page;
+ +                      }
+ +                      dest = sh->dev[data_target].page;
+ +                      init_async_submit(&submit,
+ +                                        ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ +                                        NULL, NULL, NULL,
+ +                                        to_addr_conv(sh, percpu));
+ +                      tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+ +                                     &submit);
+ +
+ +                      count = set_syndrome_sources(blocks, sh);
+ +                      init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+ +                                        ops_complete_compute, sh,
+ +                                        to_addr_conv(sh, percpu));
+ +                      return async_gen_syndrome(blocks, 0, count+2,
+ +                                                STRIPE_SIZE, &submit);
+ +              }
+ +      }
+ +
+ +      init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
+ +                        sh, to_addr_conv(sh, percpu));
+ +      if (failb == syndrome_disks) {
+ +              /* We're missing D+P. */
+ +              return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
+ +                                             faila, blocks, &submit);
+ +      } else {
+ +              /* We're missing D+D. */
+ +              return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE,
+ +                                             faila, failb, blocks, &submit);
+ +      }
+ +}
+ +
+ +
   static void ops_complete_prexor(void *stripe_head_ref)
   {
         struct stripe_head *sh = stripe_head_ref;
@@@ -904,13 -680,12 +904,13 @@@
   }
   
   static struct dma_async_tx_descriptor *
- -ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ +ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
+ +             struct dma_async_tx_descriptor *tx)
   {
- -      /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
- -      struct page *xor_srcs[disks];
+ +      struct page **xor_srcs = percpu->scribble;
         int count = 0, pd_idx = sh->pd_idx, i;
+ +      struct async_submit_ctl submit;
   
         /* existing parity data subtracted */
         struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@@ -925,9 -700,9 +925,9 @@@
                         xor_srcs[count++] = dev->page;
         }
   
- -      tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- -              ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
- -              ops_complete_prexor, sh);
+ +      init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ +                        ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+ +      tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
   
         return tx;
   }
@@@ -967,21 -742,17 +967,21 @@@ ops_run_biodrain(struct stripe_head *sh
         return tx;
   }
   
- -static void ops_complete_postxor(void *stripe_head_ref)
+ +static void ops_complete_reconstruct(void *stripe_head_ref)
   {
         struct stripe_head *sh = stripe_head_ref;
- -      int disks = sh->disks, i, pd_idx = sh->pd_idx;
+ +      int disks = sh->disks;
+ +      int pd_idx = sh->pd_idx;
+ +      int qd_idx = sh->qd_idx;
+ +      int i;
   
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
   
         for (i = disks; i--; ) {
                 struct r5dev *dev = &sh->dev[i];
- -              if (dev->written || i == pd_idx)
+ +
+ +              if (dev->written || i == pd_idx || i == qd_idx)
                         set_bit(R5_UPTODATE, &dev->flags);
         }
   
@@@ -999,12 -770,12 +999,12 @@@
   }
   
   static void
- -ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ +ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
+ +                   struct dma_async_tx_descriptor *tx)
   {
- -      /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
- -      struct page *xor_srcs[disks];
- -
+ +      struct page **xor_srcs = percpu->scribble;
+ +      struct async_submit_ctl submit;
         int count = 0, pd_idx = sh->pd_idx, i;
         struct page *xor_dest;
         int prexor = 0;
@@@ -1038,36 -809,18 +1038,36 @@@
          * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
          * for the synchronous xor case
          */
- -      flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+ +      flags = ASYNC_TX_ACK |
                 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
   
         atomic_inc(&sh->count);
   
- -      if (unlikely(count == 1)) {
- -              flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
- -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- -                      flags, tx, ops_complete_postxor, sh);
- -      } else
- -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- -                      flags, tx, ops_complete_postxor, sh);
+ +      init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+ +                        to_addr_conv(sh, percpu));
+ +      if (unlikely(count == 1))
+ +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ +      else
+ +              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+ +}
+ +
+ +static void
+ +ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
+ +                   struct dma_async_tx_descriptor *tx)
+ +{
+ +      struct async_submit_ctl submit;
+ +      struct page **blocks = percpu->scribble;
+ +      int count;
+ +
+ +      pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+ +
+ +      count = set_syndrome_sources(blocks, sh);
+ +
+ +      atomic_inc(&sh->count);
+ +
+ +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+ +                        sh, to_addr_conv(sh, percpu));
+ +      async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
   }
   
   static void ops_complete_check(void *stripe_head_ref)
@@@ -1082,115 -835,63 +1082,115 @@@
         release_stripe(sh);
   }
   
- -static void ops_run_check(struct stripe_head *sh)
+ +static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
   {
- -      /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
- -      struct page *xor_srcs[disks];
+ +      int pd_idx = sh->pd_idx;
+ +      int qd_idx = sh->qd_idx;
+ +      struct page *xor_dest;
+ +      struct page **xor_srcs = percpu->scribble;
         struct dma_async_tx_descriptor *tx;
- -
- -      int count = 0, pd_idx = sh->pd_idx, i;
- -      struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+ +      struct async_submit_ctl submit;
+ +      int count;
+ +      int i;
   
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
   
+ +      count = 0;
+ +      xor_dest = sh->dev[pd_idx].page;
+ +      xor_srcs[count++] = xor_dest;
         for (i = disks; i--; ) {
- -              struct r5dev *dev = &sh->dev[i];
- -              if (i != pd_idx)
- -                      xor_srcs[count++] = dev->page;
+ +              if (i == pd_idx || i == qd_idx)
+ +                      continue;
+ +              xor_srcs[count++] = sh->dev[i].page;
         }
   
- -      tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- -              &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+ +      init_async_submit(&submit, 0, NULL, NULL, NULL,
+ +                        to_addr_conv(sh, percpu));
+ +      tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+ +                         &sh->ops.zero_sum_result, &submit);
   
         atomic_inc(&sh->count);
- -      tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
- -              ops_complete_check, sh);
+ +      init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+ +      tx = async_trigger_callback(&submit);
   }
   
- -static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
+ +static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
+ +{
+ +      struct page **srcs = percpu->scribble;
+ +      struct async_submit_ctl submit;
+ +      int count;
+ +
+ +      pr_debug("%s: stripe %llu checkp: %d\n", __func__,
+ +              (unsigned long long)sh->sector, checkp);
+ +
+ +      count = set_syndrome_sources(srcs, sh);
+ +      if (!checkp)
+ +              srcs[count] = NULL;
+ +
+ +      atomic_inc(&sh->count);
+ +      init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
+ +                        sh, to_addr_conv(sh, percpu));
+ +      async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+ +                         &sh->ops.zero_sum_result, percpu->spare_page, &submit);
+ +}
+ +
+ +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
   {
         int overlap_clear = 0, i, disks = sh->disks;
         struct dma_async_tx_descriptor *tx = NULL;
+ +      raid5_conf_t *conf = sh->raid_conf;
+ +      int level = conf->level;
+ +      struct raid5_percpu *percpu;
+ +      unsigned long cpu;
   
+ +      cpu = get_cpu();
+ +      percpu = per_cpu_ptr(conf->percpu, cpu);
         if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                 ops_run_biofill(sh);
                 overlap_clear++;
         }
   
         if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
- -              tx = ops_run_compute5(sh);
- -              /* terminate the chain if postxor is not set to be run */
- -              if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+ +              if (level < 6)
+ +                      tx = ops_run_compute5(sh, percpu);
+ +              else {
+ +                      if (sh->ops.target2 < 0 || sh->ops.target < 0)
+ +                              tx = ops_run_compute6_1(sh, percpu);
+ +                      else
+ +                              tx = ops_run_compute6_2(sh, percpu);
+ +              }
+ +              /* terminate the chain if reconstruct is not set to be run */
+ +              if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
                         async_tx_ack(tx);
         }
   
         if (test_bit(STRIPE_OP_PREXOR, &ops_request))
- -              tx = ops_run_prexor(sh, tx);
+ +              tx = ops_run_prexor(sh, percpu, tx);
   
         if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                 tx = ops_run_biodrain(sh, tx);
                 overlap_clear++;
         }
   
- -      if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
- -              ops_run_postxor(sh, tx);
+ +      if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+ +              if (level < 6)
+ +                      ops_run_reconstruct5(sh, percpu, tx);
+ +              else
+ +                      ops_run_reconstruct6(sh, percpu, tx);
+ +      }
   
- -      if (test_bit(STRIPE_OP_CHECK, &ops_request))
- -              ops_run_check(sh);
+ +      if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+ +              if (sh->check_state == check_state_run)
+ +                      ops_run_check_p(sh, percpu);
+ +              else if (sh->check_state == check_state_run_q)
+ +                      ops_run_check_pq(sh, percpu, 0);
+ +              else if (sh->check_state == check_state_run_pq)
+ +                      ops_run_check_pq(sh, percpu, 1);
+ +              else
+ +                      BUG();
+ +      }
   
         if (overlap_clear)
                 for (i = disks; i--; ) {
@@@ -1198,7 -899,6 +1198,7 @@@
                         if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                 wake_up(&sh->raid_conf->wait_for_overlap);
                 }
+ +      put_cpu();
   }
   
   static int grow_one_stripe(raid5_conf_t *conf)
@@@ -1248,28 -948,6 +1248,28 @@@ static int grow_stripes(raid5_conf_t *c
         return 0;
   }
   
+ +/**
+ + * scribble_len - return the required size of the scribble region
+ + * @num - total number of disks in the array
+ + *
+ + * The size must be enough to contain:
+ + * 1/ a struct page pointer for each device in the array +2
+ + * 2/ room to convert each entry in (1) to its corresponding dma
+ + *    (dma_map_page()) or page (page_address()) address.
+ + *
+ + * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
+ + * calculate over all devices (not just the data blocks), using zeros in place
+ + * of the P and Q blocks.
+ + */
+ +static size_t scribble_len(int num)
+ +{
+ +      size_t len;
+ +
+ +      len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+ +
+ +      return len;
+ +}
+ +
   static int resize_stripes(raid5_conf_t *conf, int newsize)
   {
         /* Make all the stripes able to hold 'newsize' devices.
@@@ -1298,7 -976,6 +1298,7 @@@
         struct stripe_head *osh, *nsh;
         LIST_HEAD(newstripes);
         struct disk_info *ndisks;
+ +      unsigned long cpu;
         int err;
         struct kmem_cache *sc;
         int i;
@@@ -1364,7 -1041,7 +1364,7 @@@
         /* Step 3.
          * At this point, we are holding all the stripes so the array
          * is completely stalled, so now is a good time to resize
- -       * conf->disks.
+ +       * conf->disks and the scribble region
          */
         ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
         if (ndisks) {
@@@ -1375,30 -1052,10 +1375,30 @@@
         } else
                 err = -ENOMEM;
   
+ +      get_online_cpus();
+ +      conf->scribble_len = scribble_len(newsize);
+ +      for_each_present_cpu(cpu) {
+ +              struct raid5_percpu *percpu;
+ +              void *scribble;
+ +
+ +              percpu = per_cpu_ptr(conf->percpu, cpu);
+ +              scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+ +
+ +              if (scribble) {
+ +                      kfree(percpu->scribble);
+ +                      percpu->scribble = scribble;
+ +              } else {
+ +                      err = -ENOMEM;
+ +                      break;
+ +              }
+ +      }
+ +      put_online_cpus();
+ +
         /* Step 4, return new stripes to service */
         while(!list_empty(&newstripes)) {
                 nsh = list_entry(newstripes.next, struct stripe_head, lru);
                 list_del_init(&nsh->lru);
+ +
                 for (i=conf->raid_disks; i < newsize; i++)
                         if (nsh->dev[i].page == NULL) {
                                 struct page *p = alloc_page(GFP_NOIO);
@@@ -1617,8 -1274,8 +1617,8 @@@ static sector_t raid5_compute_sector(ra
         sector_t new_sector;
         int algorithm = previous ? conf->prev_algo
                                  : conf->algorithm;
-       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
-                                        : (conf->chunk_size >> 9);
+       int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+                                        : conf->chunk_sectors;
         int raid_disks = previous ? conf->previous_raid_disks
                                   : conf->raid_disks;
         int data_disks = raid_disks - conf->max_degraded;
@@@ -1823,8 -1480,8 +1823,8 @@@ static sector_t compute_blocknr(struct 
         int raid_disks = sh->disks;
         int data_disks = raid_disks - conf->max_degraded;
         sector_t new_sector = sh->sector, check;
-       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
-                                        : (conf->chunk_size >> 9);
+       int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+                                        : conf->chunk_sectors;
         int algorithm = previous ? conf->prev_algo
                                  : conf->algorithm;
         sector_t stripe;
@@@ -1937,13 -1594,258 +1937,13 @@@
   }
   
   
- -
- -/*
- - * Copy data between a page in the stripe cache, and one or more bion
- - * The page could align with the middle of the bio, or there could be
- - * several bion, each with several bio_vecs, which cover part of the page
- - * Multiple bion are linked together on bi_next.  There may be extras
- - * at the end of this list.  We ignore them.
- - */
- -static void copy_data(int frombio, struct bio *bio,
- -                   struct page *page,
- -                   sector_t sector)
- -{
- -      char *pa = page_address(page);
- -      struct bio_vec *bvl;
- -      int i;
- -      int page_offset;
- -
- -      if (bio->bi_sector >= sector)
- -              page_offset = (signed)(bio->bi_sector - sector) * 512;
- -      else
- -              page_offset = (signed)(sector - bio->bi_sector) * -512;
- -      bio_for_each_segment(bvl, bio, i) {
- -              int len = bio_iovec_idx(bio,i)->bv_len;
- -              int clen;
- -              int b_offset = 0;
- -
- -              if (page_offset < 0) {
- -                      b_offset = -page_offset;
- -                      page_offset += b_offset;
- -                      len -= b_offset;
- -              }
- -
- -              if (len > 0 && page_offset + len > STRIPE_SIZE)
- -                      clen = STRIPE_SIZE - page_offset;
- -              else clen = len;
- -
- -              if (clen > 0) {
- -                      char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
- -                      if (frombio)
- -                              memcpy(pa+page_offset, ba+b_offset, clen);
- -                      else
- -                              memcpy(ba+b_offset, pa+page_offset, clen);
- -                      __bio_kunmap_atomic(ba, KM_USER0);
- -              }
- -              if (clen < len) /* hit end of page */
- -                      break;
- -              page_offset +=  len;
- -      }
- -}
- -
- -#define check_xor()   do {                                              \
- -                              if (count == MAX_XOR_BLOCKS) {            \
- -                              xor_blocks(count, STRIPE_SIZE, dest, ptr);\
- -                              count = 0;                                \
- -                         }                                              \
- -                      } while(0)
- -
- -static void compute_parity6(struct stripe_head *sh, int method)
- -{
- -      raid5_conf_t *conf = sh->raid_conf;
- -      int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
- -      int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
- -      struct bio *chosen;
- -      /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- -      void *ptrs[syndrome_disks+2];
- -
- -      pd_idx = sh->pd_idx;
- -      qd_idx = sh->qd_idx;
- -      d0_idx = raid6_d0(sh);
- -
- -      pr_debug("compute_parity, stripe %llu, method %d\n",
- -              (unsigned long long)sh->sector, method);
- -
- -      switch(method) {
- -      case READ_MODIFY_WRITE:
- -              BUG();          /* READ_MODIFY_WRITE N/A for RAID-6 */
- -      case RECONSTRUCT_WRITE:
- -              for (i= disks; i-- ;)
- -                      if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
- -                              chosen = sh->dev[i].towrite;
- -                              sh->dev[i].towrite = NULL;
- -
- -                              if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- -                                      wake_up(&conf->wait_for_overlap);
- -
- -                              BUG_ON(sh->dev[i].written);
- -                              sh->dev[i].written = chosen;
- -                      }
- -              break;
- -      case CHECK_PARITY:
- -              BUG();          /* Not implemented yet */
- -      }
- -
- -      for (i = disks; i--;)
- -              if (sh->dev[i].written) {
- -                      sector_t sector = sh->dev[i].sector;
- -                      struct bio *wbi = sh->dev[i].written;
- -                      while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
- -                              copy_data(1, wbi, sh->dev[i].page, sector);
- -                              wbi = r5_next_bio(wbi, sector);
- -                      }
- -
- -                      set_bit(R5_LOCKED, &sh->dev[i].flags);
- -                      set_bit(R5_UPTODATE, &sh->dev[i].flags);
- -              }
- -
- -      /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
- -
- -      for (i = 0; i < disks; i++)
- -              ptrs[i] = (void *)raid6_empty_zero_page;
- -
- -      count = 0;
- -      i = d0_idx;
- -      do {
- -              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
- -
- -              ptrs[slot] = page_address(sh->dev[i].page);
- -              if (slot < syndrome_disks &&
- -                  !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
- -                      printk(KERN_ERR "block %d/%d not uptodate "
- -                             "on parity calc\n", i, count);
- -                      BUG();
- -              }
- -
- -              i = raid6_next_disk(i, disks);
- -      } while (i != d0_idx);
- -      BUG_ON(count != syndrome_disks);
- -
- -      raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
- -
- -      switch(method) {
- -      case RECONSTRUCT_WRITE:
- -              set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- -              set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- -              set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
- -              set_bit(R5_LOCKED,   &sh->dev[qd_idx].flags);
- -              break;
- -      case UPDATE_PARITY:
- -              set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
- -              set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
- -              break;
- -      }
- -}
- -
- -
- -/* Compute one missing block */
- -static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
- -{
- -      int i, count, disks = sh->disks;
- -      void *ptr[MAX_XOR_BLOCKS], *dest, *p;
- -      int qd_idx = sh->qd_idx;
- -
- -      pr_debug("compute_block_1, stripe %llu, idx %d\n",
- -              (unsigned long long)sh->sector, dd_idx);
- -
- -      if ( dd_idx == qd_idx ) {
- -              /* We're actually computing the Q drive */
- -              compute_parity6(sh, UPDATE_PARITY);
- -      } else {
- -              dest = page_address(sh->dev[dd_idx].page);
- -              if (!nozero) memset(dest, 0, STRIPE_SIZE);
- -              count = 0;
- -              for (i = disks ; i--; ) {
- -                      if (i == dd_idx || i == qd_idx)
- -                              continue;
- -                      p = page_address(sh->dev[i].page);
- -                      if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
- -                              ptr[count++] = p;
- -                      else
- -                              printk("compute_block() %d, stripe %llu, %d"
- -                                     " not present\n", dd_idx,
- -                                     (unsigned long long)sh->sector, i);
- -
- -                      check_xor();
- -              }
- -              if (count)
- -                      xor_blocks(count, STRIPE_SIZE, dest, ptr);
- -              if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- -              else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- -      }
- -}
- -
- -/* Compute two missing blocks */
- -static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
- -{
- -      int i, count, disks = sh->disks;
- -      int syndrome_disks = sh->ddf_layout ? disks : disks-2;
- -      int d0_idx = raid6_d0(sh);
- -      int faila = -1, failb = -1;
- -      /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- -      void *ptrs[syndrome_disks+2];
- -
- -      for (i = 0; i < disks ; i++)
- -              ptrs[i] = (void *)raid6_empty_zero_page;
- -      count = 0;
- -      i = d0_idx;
- -      do {
- -              int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
- -
- -              ptrs[slot] = page_address(sh->dev[i].page);
- -
- -              if (i == dd_idx1)
- -                      faila = slot;
- -              if (i == dd_idx2)
- -                      failb = slot;
- -              i = raid6_next_disk(i, disks);
- -      } while (i != d0_idx);
- -      BUG_ON(count != syndrome_disks);
- -
- -      BUG_ON(faila == failb);
- -      if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
- -
- -      pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
- -               (unsigned long long)sh->sector, dd_idx1, dd_idx2,
- -               faila, failb);
- -
- -      if (failb == syndrome_disks+1) {
- -              /* Q disk is one of the missing disks */
- -              if (faila == syndrome_disks) {
- -                      /* Missing P+Q, just recompute */
- -                      compute_parity6(sh, UPDATE_PARITY);
- -                      return;
- -              } else {
- -                      /* We're missing D+Q; recompute D from P */
- -                      compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
- -                                           dd_idx2 : dd_idx1),
- -                                      0);
- -                      compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
- -                      return;
- -              }
- -      }
- -
- -      /* We're missing D+P or D+D; */
- -      if (failb == syndrome_disks) {
- -              /* We're missing D+P. */
- -              raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
- -      } else {
- -              /* We're missing D+D. */
- -              raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
- -                                ptrs);
- -      }
- -
- -      /* Both the above update both missing blocks */
- -      set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
- -      set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
- -}
- -
   static void
- -schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
+ +schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                          int rcw, int expand)
   {
         int i, pd_idx = sh->pd_idx, disks = sh->disks;
+ +      raid5_conf_t *conf = sh->raid_conf;
+ +      int level = conf->level;
   
         if (rcw) {
                 /* if we are not expanding this is a proper write request, and
@@@ -1956,7 -1858,7 +1956,7 @@@
                 } else
                         sh->reconstruct_state = reconstruct_state_run;
   
- -              set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ +              set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
   
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@@ -1969,18 -1871,17 +1969,18 @@@
                                 s->locked++;
                         }
                 }
- -              if (s->locked + 1 == disks)
+ +              if (s->locked + conf->max_degraded == disks)
                         if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- -                              atomic_inc(&sh->raid_conf->pending_full_writes);
+ +                              atomic_inc(&conf->pending_full_writes);
         } else {
+ +              BUG_ON(level == 6);
                 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                         test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
   
                 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
                 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
                 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
- -              set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+ +              set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
   
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@@ -1998,22 -1899,13 +1998,22 @@@
                 }
         }
   
- -      /* keep the parity disk locked while asynchronous operations
+ +      /* keep the parity disk(s) locked while asynchronous operations
          * are in flight
          */
         set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
         clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
         s->locked++;
   
+ +      if (level == 6) {
+ +              int qd_idx = sh->qd_idx;
+ +              struct r5dev *dev = &sh->dev[qd_idx];
+ +
+ +              set_bit(R5_LOCKED, &dev->flags);
+ +              clear_bit(R5_UPTODATE, &dev->flags);
+ +              s->locked++;
+ +      }
+ +
         pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
                 __func__, (unsigned long long)sh->sector,
                 s->locked, s->ops_request);
@@@ -2094,12 -1986,18 +2094,11 @@@ static int add_stripe_bio(struct stripe
   
   static void end_reshape(raid5_conf_t *conf);
   
- -static int page_is_zero(struct page *p)
- -{
- -      char *a = page_address(p);
- -      return ((*(u32*)a) == 0 &&
- -              memcmp(a, a+4, STRIPE_SIZE-4)==0);
- -}
- -
   static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                             struct stripe_head *sh)
   {
         int sectors_per_chunk =
-               previous ? (conf->prev_chunk >> 9)
-                        : (conf->chunk_size >> 9);
+               previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
         int dd_idx;
         int chunk_offset = sector_div(stripe, sectors_per_chunk);
         int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@@ -2234,10 -2132,9 +2233,10 @@@ static int fetch_block5(struct stripe_h
                         set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                         set_bit(R5_Wantcompute, &dev->flags);
                         sh->ops.target = disk_idx;
+ +                      sh->ops.target2 = -1;
                         s->req_compute = 1;
                         /* Careful: from this point on 'uptodate' is in the eye
- -                       * of raid5_run_ops which services 'compute' operations
+ +                       * of raid_run_ops which services 'compute' operations
                          * before writes. R5_Wantcompute flags a block that will
                          * be R5_UPTODATE by the time it is needed for a
                          * subsequent operation.
@@@ -2276,104 -2173,61 +2275,104 @@@ static void handle_stripe_fill5(struct 
         set_bit(STRIPE_HANDLE, &sh->state);
   }
   
- -static void handle_stripe_fill6(struct stripe_head *sh,
- -                      struct stripe_head_state *s, struct r6_state *r6s,
- -                      int disks)
+ +/* fetch_block6 - checks the given member device to see if its data needs
+ + * to be read or computed to satisfy a request.
+ + *
+ + * Returns 1 when no more member devices need to be checked, otherwise returns
+ + * 0 to tell the loop in handle_stripe_fill6 to continue
+ + */
+ +static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
+ +                       struct r6_state *r6s, int disk_idx, int disks)
   {
- -      int i;
- -      for (i = disks; i--; ) {
- -              struct r5dev *dev = &sh->dev[i];
- -              if (!test_bit(R5_LOCKED, &dev->flags) &&
- -                  !test_bit(R5_UPTODATE, &dev->flags) &&
- -                  (dev->toread || (dev->towrite &&
- -                   !test_bit(R5_OVERWRITE, &dev->flags)) ||
- -                   s->syncing || s->expanding ||
- -                   (s->failed >= 1 &&
- -                    (sh->dev[r6s->failed_num[0]].toread ||
- -                     s->to_write)) ||
- -                   (s->failed >= 2 &&
- -                    (sh->dev[r6s->failed_num[1]].toread ||
- -                     s->to_write)))) {
- -                      /* we would like to get this block, possibly
- -                       * by computing it, but we might not be able to
+ +      struct r5dev *dev = &sh->dev[disk_idx];
+ +      struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
+ +                                &sh->dev[r6s->failed_num[1]] };
+ +
+ +      if (!test_bit(R5_LOCKED, &dev->flags) &&
+ +          !test_bit(R5_UPTODATE, &dev->flags) &&
+ +          (dev->toread ||
+ +           (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ +           s->syncing || s->expanding ||
+ +           (s->failed >= 1 &&
+ +            (fdev[0]->toread || s->to_write)) ||
+ +           (s->failed >= 2 &&
+ +            (fdev[1]->toread || s->to_write)))) {
+ +              /* we would like to get this block, possibly by computing it,
+ +               * otherwise read it if the backing disk is insync
+ +               */
+ +              BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
+ +              BUG_ON(test_bit(R5_Wantread, &dev->flags));
+ +              if ((s->uptodate == disks - 1) &&
+ +                  (s->failed && (disk_idx == r6s->failed_num[0] ||
+ +                                 disk_idx == r6s->failed_num[1]))) {
+ +                      /* have disk failed, and we're requested to fetch it;
+ +                       * do compute it
                          */
- -                      if ((s->uptodate == disks - 1) &&
- -                          (s->failed && (i == r6s->failed_num[0] ||
- -                                         i == r6s->failed_num[1]))) {
- -                              pr_debug("Computing stripe %llu block %d\n",
- -                                     (unsigned long long)sh->sector, i);
- -                              compute_block_1(sh, i, 0);
- -                              s->uptodate++;
- -                      } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
- -                              /* Computing 2-failure is *very* expensive; only
- -                               * do it if failed >= 2
- -                               */
- -                              int other;
- -                              for (other = disks; other--; ) {
- -                                      if (other == i)
- -                                              continue;
- -                                      if (!test_bit(R5_UPTODATE,
- -                                            &sh->dev[other].flags))
- -                                              break;
- -                              }
- -                              BUG_ON(other < 0);
- -                              pr_debug("Computing stripe %llu blocks %d,%d\n",
- -                                     (unsigned long long)sh->sector,
- -                                     i, other);
- -                              compute_block_2(sh, i, other);
- -                              s->uptodate += 2;
- -                      } else if (test_bit(R5_Insync, &dev->flags)) {
- -                              set_bit(R5_LOCKED, &dev->flags);
- -                              set_bit(R5_Wantread, &dev->flags);
- -                              s->locked++;
- -                              pr_debug("Reading block %d (sync=%d)\n",
- -                                      i, s->syncing);
+ +                      pr_debug("Computing stripe %llu block %d\n",
+ +                             (unsigned long long)sh->sector, disk_idx);
+ +                      set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ +                      set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ +                      set_bit(R5_Wantcompute, &dev->flags);
+ +                      sh->ops.target = disk_idx;
+ +                      sh->ops.target2 = -1; /* no 2nd target */
+ +                      s->req_compute = 1;
+ +                      s->uptodate++;
+ +                      return 1;
+ +              } else if (s->uptodate == disks-2 && s->failed >= 2) {
+ +                      /* Computing 2-failure is *very* expensive; only
+ +                       * do it if failed >= 2
+ +                       */
+ +                      int other;
+ +                      for (other = disks; other--; ) {
+ +                              if (other == disk_idx)
+ +                                      continue;
+ +                              if (!test_bit(R5_UPTODATE,
+ +                                    &sh->dev[other].flags))
+ +                                      break;
                         }
+ +                      BUG_ON(other < 0);
+ +                      pr_debug("Computing stripe %llu blocks %d,%d\n",
+ +                             (unsigned long long)sh->sector,
+ +                             disk_idx, other);
+ +                      set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ +                      set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ +                      set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
+ +                      set_bit(R5_Wantcompute, &sh->dev[other].flags);
+ +                      sh->ops.target = disk_idx;
+ +                      sh->ops.target2 = other;
+ +                      s->uptodate += 2;
+ +                      s->req_compute = 1;
+ +                      return 1;
+ +              } else if (test_bit(R5_Insync, &dev->flags)) {
+ +                      set_bit(R5_LOCKED, &dev->flags);
+ +                      set_bit(R5_Wantread, &dev->flags);
+ +                      s->locked++;
+ +                      pr_debug("Reading block %d (sync=%d)\n",
+ +                              disk_idx, s->syncing);
                 }
         }
+ +
+ +      return 0;
+ +}
+ +
+ +/**
+ + * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ + */
+ +static void handle_stripe_fill6(struct stripe_head *sh,
+ +                      struct stripe_head_state *s, struct r6_state *r6s,
+ +                      int disks)
+ +{
+ +      int i;
+ +
+ +      /* look for blocks to read/compute, skip this if a compute
+ +       * is already in flight, or if the stripe contents are in the
+ +       * midst of changing due to a write
+ +       */
+ +      if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+ +          !sh->reconstruct_state)
+ +              for (i = disks; i--; )
+ +                      if (fetch_block6(sh, s, r6s, i, disks))
+ +                              break;
         set_bit(STRIPE_HANDLE, &sh->state);
   }
   
@@@ -2507,61 -2361,114 +2506,61 @@@ static void handle_stripe_dirtying5(rai
          */
         /* since handle_stripe can be called at any time we need to handle the
          * case where a compute block operation has been submitted and then a
- -       * subsequent call wants to start a write request.  raid5_run_ops only
- -       * handles the case where compute block and postxor are requested
+ +       * subsequent call wants to start a write request.  raid_run_ops only
+ +       * handles the case where compute block and reconstruct are requested
          * simultaneously.  If this is not the case then new writes need to be
          * held off until the compute completes.
          */
         if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
             (s->locked == 0 && (rcw == 0 || rmw == 0) &&
             !test_bit(STRIPE_BIT_DELAY, &sh->state)))
- -              schedule_reconstruction5(sh, s, rcw == 0, 0);
+ +              schedule_reconstruction(sh, s, rcw == 0, 0);
   }
   
   static void handle_stripe_dirtying6(raid5_conf_t *conf,
                 struct stripe_head *sh, struct stripe_head_state *s,
                 struct r6_state *r6s, int disks)
   {
- -      int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
+ +      int rcw = 0, pd_idx = sh->pd_idx, i;
         int qd_idx = sh->qd_idx;
+ +
+ +      set_bit(STRIPE_HANDLE, &sh->state);
         for (i = disks; i--; ) {
                 struct r5dev *dev = &sh->dev[i];
- -              /* Would I have to read this buffer for reconstruct_write */
- -              if (!test_bit(R5_OVERWRITE, &dev->flags)
- -                  && i != pd_idx && i != qd_idx
- -                  && (!test_bit(R5_LOCKED, &dev->flags)
- -                          ) &&
- -                  !test_bit(R5_UPTODATE, &dev->flags)) {
- -                      if (test_bit(R5_Insync, &dev->flags)) rcw++;
- -                      else {
- -                              pr_debug("raid6: must_compute: "
- -                                      "disk %d flags=%#lx\n", i, dev->flags);
- -                              must_compute++;
- -                      }
- -              }
- -      }
- -      pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
- -             (unsigned long long)sh->sector, rcw, must_compute);
- -      set_bit(STRIPE_HANDLE, &sh->state);
- -
- -      if (rcw > 0)
- -              /* want reconstruct write, but need to get some data */
- -              for (i = disks; i--; ) {
- -                      struct r5dev *dev = &sh->dev[i];
- -                      if (!test_bit(R5_OVERWRITE, &dev->flags)
- -                          && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
- -                          && !test_bit(R5_LOCKED, &dev->flags) &&
- -                          !test_bit(R5_UPTODATE, &dev->flags) &&
- -                          test_bit(R5_Insync, &dev->flags)) {
- -                              if (
- -                                test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- -                                      pr_debug("Read_old stripe %llu "
- -                                              "block %d for Reconstruct\n",
- -                                           (unsigned long long)sh->sector, i);
- -                                      set_bit(R5_LOCKED, &dev->flags);
- -                                      set_bit(R5_Wantread, &dev->flags);
- -                                      s->locked++;
- -                              } else {
- -                                      pr_debug("Request delayed stripe %llu "
- -                                              "block %d for Reconstruct\n",
- -                                           (unsigned long long)sh->sector, i);
- -                                      set_bit(STRIPE_DELAYED, &sh->state);
- -                                      set_bit(STRIPE_HANDLE, &sh->state);
- -                              }
+ +              /* check if we haven't enough data */
+ +              if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+ +                  i != pd_idx && i != qd_idx &&
+ +                  !test_bit(R5_LOCKED, &dev->flags) &&
+ +                  !(test_bit(R5_UPTODATE, &dev->flags) ||
+ +                    test_bit(R5_Wantcompute, &dev->flags))) {
+ +                      rcw++;
+ +                      if (!test_bit(R5_Insync, &dev->flags))
+ +                              continue; /* it's a failed drive */
+ +
+ +                      if (
+ +                        test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ +                              pr_debug("Read_old stripe %llu "
+ +                                      "block %d for Reconstruct\n",
+ +                                   (unsigned long long)sh->sector, i);
+ +                              set_bit(R5_LOCKED, &dev->flags);
+ +                              set_bit(R5_Wantread, &dev->flags);
+ +                              s->locked++;
+ +                      } else {
+ +                              pr_debug("Request delayed stripe %llu "
+ +                                      "block %d for Reconstruct\n",
+ +                                   (unsigned long long)sh->sector, i);
+ +                              set_bit(STRIPE_DELAYED, &sh->state);
+ +                              set_bit(STRIPE_HANDLE, &sh->state);
                         }
                 }
+ +      }
         /* now if nothing is locked, and if we have enough data, we can start a
          * write request
          */
- -      if (s->locked == 0 && rcw == 0 &&
+ +      if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+ +          s->locked == 0 && rcw == 0 &&
             !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
- -              if (must_compute > 0) {
- -                      /* We have failed blocks and need to compute them */
- -                      switch (s->failed) {
- -                      case 0:
- -                              BUG();
- -                      case 1:
- -                              compute_block_1(sh, r6s->failed_num[0], 0);
- -                              break;
- -                      case 2:
- -                              compute_block_2(sh, r6s->failed_num[0],
- -                                              r6s->failed_num[1]);
- -                              break;
- -                      default: /* This request should have been failed? */
- -                              BUG();
- -                      }
- -              }
- -
- -              pr_debug("Computing parity for stripe %llu\n",
- -                      (unsigned long long)sh->sector);
- -              compute_parity6(sh, RECONSTRUCT_WRITE);
- -              /* now every locked buffer is ready to be written */
- -              for (i = disks; i--; )
- -                      if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
- -                              pr_debug("Writing stripe %llu block %d\n",
- -                                     (unsigned long long)sh->sector, i);
- -                              s->locked++;
- -                              set_bit(R5_Wantwrite, &sh->dev[i].flags);
- -                      }
- -              if (s->locked == disks)
- -                      if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
- -                              atomic_inc(&conf->pending_full_writes);
- -              /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
- -              set_bit(STRIPE_INSYNC, &sh->state);
- -
- -              if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- -                      atomic_dec(&conf->preread_active_stripes);
- -                      if (atomic_read(&conf->preread_active_stripes) <
- -                          IO_THRESHOLD)
- -                              md_wakeup_thread(conf->mddev->thread);
- -              }
+ +              schedule_reconstruction(sh, s, 1, 0);
         }
   }
   
@@@ -2620,7 -2527,7 +2619,7 @@@ static void handle_parity_checks5(raid5
                  * we are done.  Otherwise update the mismatch count and repair
                  * parity if !MD_RECOVERY_CHECK
                  */
- -              if (sh->ops.zero_sum_result == 0)
+ +              if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
                         /* parity is correct (on disc,
                          * not in buffer any more)
                          */
@@@ -2637,7 -2544,6 +2636,7 @@@
                                 set_bit(R5_Wantcompute,
                                         &sh->dev[sh->pd_idx].flags);
                                 sh->ops.target = sh->pd_idx;
+ +                              sh->ops.target2 = -1;
                                 s->uptodate++;
                         }
                 }
@@@ -2654,74 -2560,67 +2653,74 @@@
   
   
   static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
- -                              struct stripe_head_state *s,
- -                              struct r6_state *r6s, struct page *tmp_page,
- -                              int disks)
+ +                                struct stripe_head_state *s,
+ +                                struct r6_state *r6s, int disks)
   {
- -      int update_p = 0, update_q = 0;
- -      struct r5dev *dev;
         int pd_idx = sh->pd_idx;
         int qd_idx = sh->qd_idx;
+ +      struct r5dev *dev;
   
         set_bit(STRIPE_HANDLE, &sh->state);
   
         BUG_ON(s->failed > 2);
- -      BUG_ON(s->uptodate < disks);
+ +
         /* Want to check and possibly repair P and Q.
          * However there could be one 'failed' device, in which
          * case we can only check one of them, possibly using the
          * other to generate missing data
          */
   
- -      /* If !tmp_page, we cannot do the calculations,
- -       * but as we have set STRIPE_HANDLE, we will soon be called
- -       * by stripe_handle with a tmp_page - just wait until then.
- -       */
- -      if (tmp_page) {
+ +      switch (sh->check_state) {
+ +      case check_state_idle:
+ +              /* start a new check operation if there are < 2 failures */
                 if (s->failed == r6s->q_failed) {
- -                      /* The only possible failed device holds 'Q', so it
+ +                      /* The only possible failed device holds Q, so it
                          * makes sense to check P (If anything else were failed,
                          * we would have used P to recreate it).
                          */
- -                      compute_block_1(sh, pd_idx, 1);
- -                      if (!page_is_zero(sh->dev[pd_idx].page)) {
- -                              compute_block_1(sh, pd_idx, 0);
- -                              update_p = 1;
- -                      }
+ +                      sh->check_state = check_state_run;
                 }
                 if (!r6s->q_failed && s->failed < 2) {
- -                      /* q is not failed, and we didn't use it to generate
+ +                      /* Q is not failed, and we didn't use it to generate
                          * anything, so it makes sense to check it
                          */
- -                      memcpy(page_address(tmp_page),
- -                             page_address(sh->dev[qd_idx].page),
- -                             STRIPE_SIZE);
- -                      compute_parity6(sh, UPDATE_PARITY);
- -                      if (memcmp(page_address(tmp_page),
- -                                 page_address(sh->dev[qd_idx].page),
- -                                 STRIPE_SIZE) != 0) {
- -                              clear_bit(STRIPE_INSYNC, &sh->state);
- -                              update_q = 1;
- -                      }
+ +                      if (sh->check_state == check_state_run)
+ +                              sh->check_state = check_state_run_pq;
+ +                      else
+ +                              sh->check_state = check_state_run_q;
                 }
- -              if (update_p || update_q) {
- -                      conf->mddev->resync_mismatches += STRIPE_SECTORS;
- -                      if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
- -                              /* don't try to repair!! */
- -                              update_p = update_q = 0;
+ +
+ +              /* discard potentially stale zero_sum_result */
+ +              sh->ops.zero_sum_result = 0;
+ +
+ +              if (sh->check_state == check_state_run) {
+ +                      /* async_xor_zero_sum destroys the contents of P */
+ +                      clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ +                      s->uptodate--;
+ +              }
+ +              if (sh->check_state >= check_state_run &&
+ +                  sh->check_state <= check_state_run_pq) {
+ +                      /* async_syndrome_zero_sum preserves P and Q, so
+ +                       * no need to mark them !uptodate here
+ +                       */
+ +                      set_bit(STRIPE_OP_CHECK, &s->ops_request);
+ +                      break;
                 }
   
+ +              /* we have 2-disk failure */
+ +              BUG_ON(s->failed != 2);
+ +              /* fall through */
+ +      case check_state_compute_result:
+ +              sh->check_state = check_state_idle;
+ +
+ +              /* check that a write has not made the stripe insync */
+ +              if (test_bit(STRIPE_INSYNC, &sh->state))
+ +                      break;
+ +
                 /* now write out any block on a failed drive,
- -               * or P or Q if they need it
+ +               * or P or Q if they were recomputed
                  */
- -
+ +              BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
                 if (s->failed == 2) {
                         dev = &sh->dev[r6s->failed_num[1]];
                         s->locked++;
@@@ -2734,13 -2633,14 +2733,13 @@@
                         set_bit(R5_LOCKED, &dev->flags);
                         set_bit(R5_Wantwrite, &dev->flags);
                 }
- -
- -              if (update_p) {
+ +              if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
                         dev = &sh->dev[pd_idx];
                         s->locked++;
                         set_bit(R5_LOCKED, &dev->flags);
                         set_bit(R5_Wantwrite, &dev->flags);
                 }
- -              if (update_q) {
+ +              if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
                         dev = &sh->dev[qd_idx];
                         s->locked++;
                         set_bit(R5_LOCKED, &dev->flags);
@@@ -2749,70 -2649,6 +2748,70 @@@
                 clear_bit(STRIPE_DEGRADED, &sh->state);
   
                 set_bit(STRIPE_INSYNC, &sh->state);
+ +              break;
+ +      case check_state_run:
+ +      case check_state_run_q:
+ +      case check_state_run_pq:
+ +              break; /* we will be called again upon completion */
+ +      case check_state_check_result:
+ +              sh->check_state = check_state_idle;
+ +
+ +              /* handle a successful check operation, if parity is correct
+ +               * we are done.  Otherwise update the mismatch count and repair
+ +               * parity if !MD_RECOVERY_CHECK
+ +               */
+ +              if (sh->ops.zero_sum_result == 0) {
+ +                      /* both parities are correct */
+ +                      if (!s->failed)
+ +                              set_bit(STRIPE_INSYNC, &sh->state);
+ +                      else {
+ +                              /* in contrast to the raid5 case we can validate
+ +                               * parity, but still have a failure to write
+ +                               * back
+ +                               */
+ +                              sh->check_state = check_state_compute_result;
+ +                              /* Returning at this point means that we may go
+ +                               * off and bring p and/or q uptodate again so
+ +                               * we make sure to check zero_sum_result again
+ +                               * to verify if p or q need writeback
+ +                               */
+ +                      }
+ +              } else {
+ +                      conf->mddev->resync_mismatches += STRIPE_SECTORS;
+ +                      if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ +                              /* don't try to repair!! */
+ +                              set_bit(STRIPE_INSYNC, &sh->state);
+ +                      else {
+ +                              int *target = &sh->ops.target;
+ +
+ +                              sh->ops.target = -1;
+ +                              sh->ops.target2 = -1;
+ +                              sh->check_state = check_state_compute_run;
+ +                              set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ +                              set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ +                              if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+ +                                      set_bit(R5_Wantcompute,
+ +                                              &sh->dev[pd_idx].flags);
+ +                                      *target = pd_idx;
+ +                                      target = &sh->ops.target2;
+ +                                      s->uptodate++;
+ +                              }
+ +                              if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+ +                                      set_bit(R5_Wantcompute,
+ +                                              &sh->dev[qd_idx].flags);
+ +                                      *target = qd_idx;
+ +                                      s->uptodate++;
+ +                              }
+ +                      }
+ +              }
+ +              break;
+ +      case check_state_compute_run:
+ +              break;
+ +      default:
+ +              printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+ +                     __func__, sh->check_state,
+ +                     (unsigned long long) sh->sector);
+ +              BUG();
         }
   }
   
@@@ -2830,7 -2666,6 +2829,7 @@@ static void handle_stripe_expansion(rai
                 if (i != sh->pd_idx && i != sh->qd_idx) {
                         int dd_idx, j;
                         struct stripe_head *sh2;
+ +                      struct async_submit_ctl submit;
   
                         sector_t bn = compute_blocknr(sh, i, 1);
                         sector_t s = raid5_compute_sector(conf, bn, 0,
@@@ -2850,10 -2685,9 +2849,10 @@@
                         }
   
                         /* place all the copies on one channel */
+ +                      init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
                         tx = async_memcpy(sh2->dev[dd_idx].page,
- -                              sh->dev[i].page, 0, 0, STRIPE_SIZE,
- -                              ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+ +                                        sh->dev[i].page, 0, 0, STRIPE_SIZE,
+ +                                        &submit);
   
                         set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
                         set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@@ -3139,7 -2973,7 +3138,7 @@@ static bool handle_stripe5(struct strip
                 /* Need to write out all blocks after computing parity */
                 sh->disks = conf->raid_disks;
                 stripe_set_idx(sh->sector, conf, 0, sh);
- -              schedule_reconstruction5(sh, &s, 1, 1);
+ +              schedule_reconstruction(sh, &s, 1, 1);
         } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
@@@ -3159,7 -2993,7 +3158,7 @@@
                 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
   
         if (s.ops_request)
- -              raid5_run_ops(sh, s.ops_request);
+ +              raid_run_ops(sh, s.ops_request);
   
         ops_run_io(sh, &s);
   
@@@ -3168,7 -3002,7 +3167,7 @@@
         return blocked_rdev == NULL;
   }
   
- -static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+ +static bool handle_stripe6(struct stripe_head *sh)
   {
         raid5_conf_t *conf = sh->raid_conf;
         int disks = sh->disks;
@@@ -3180,10 -3014,9 +3179,10 @@@
         mdk_rdev_t *blocked_rdev = NULL;
   
         pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
- -              "pd_idx=%d, qd_idx=%d\n",
+ +              "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
                (unsigned long long)sh->sector, sh->state,
- -             atomic_read(&sh->count), pd_idx, qd_idx);
+ +             atomic_read(&sh->count), pd_idx, qd_idx,
+ +             sh->check_state, sh->reconstruct_state);
         memset(&s, 0, sizeof(s));
   
         spin_lock(&sh->lock);
@@@ -3203,24 -3036,35 +3202,24 @@@
   
                 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
                         i, dev->flags, dev->toread, dev->towrite, dev->written);
- -              /* maybe we can reply to a read */
- -              if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
- -                      struct bio *rbi, *rbi2;
- -                      pr_debug("Return read for disc %d\n", i);
- -                      spin_lock_irq(&conf->device_lock);
- -                      rbi = dev->toread;
- -                      dev->toread = NULL;
- -                      if (test_and_clear_bit(R5_Overlap, &dev->flags))
- -                              wake_up(&conf->wait_for_overlap);
- -                      spin_unlock_irq(&conf->device_lock);
- -                      while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
- -                              copy_data(0, rbi, dev->page, dev->sector);
- -                              rbi2 = r5_next_bio(rbi, dev->sector);
- -                              spin_lock_irq(&conf->device_lock);
- -                              if (!raid5_dec_bi_phys_segments(rbi)) {
- -                                      rbi->bi_next = return_bi;
- -                                      return_bi = rbi;
- -                              }
- -                              spin_unlock_irq(&conf->device_lock);
- -                              rbi = rbi2;
- -                      }
- -              }
+ +              /* maybe we can reply to a read
+ +               *
+ +               * new wantfill requests are only permitted while
+ +               * ops_complete_biofill is guaranteed to be inactive
+ +               */
+ +              if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
+ +                  !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
+ +                      set_bit(R5_Wantfill, &dev->flags);
   
                 /* now count some things */
                 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
                 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+ +              if (test_bit(R5_Wantcompute, &dev->flags))
+ +                      BUG_ON(++s.compute > 2);
   
- -
- -              if (dev->toread)
+ +              if (test_bit(R5_Wantfill, &dev->flags)) {
+ +                      s.to_fill++;
+ +              } else if (dev->toread)
                         s.to_read++;
                 if (dev->towrite) {
                         s.to_write++;
@@@ -3261,11 -3105,6 +3260,11 @@@
                 blocked_rdev = NULL;
         }
   
+ +      if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+ +              set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+ +              set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+ +      }
+ +
         pr_debug("locked=%d uptodate=%d to_read=%d"
                " to_write=%d failed=%d failed_num=%d,%d\n",
                s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@@ -3306,62 -3145,19 +3305,62 @@@
          * or to load a block that is being partially written.
          */
         if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
- -          (s.syncing && (s.uptodate < disks)) || s.expanding)
+ +          (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
                 handle_stripe_fill6(sh, &s, &r6s, disks);
   
- -      /* now to consider writing and what else, if anything should be read */
- -      if (s.to_write)
+ +      /* Now we check to see if any write operations have recently
+ +       * completed
+ +       */
+ +      if (sh->reconstruct_state == reconstruct_state_drain_result) {
+ +              int qd_idx = sh->qd_idx;
+ +
+ +              sh->reconstruct_state = reconstruct_state_idle;
+ +              /* All the 'written' buffers and the parity blocks are ready to
+ +               * be written back to disk
+ +               */
+ +              BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+ +              BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+ +              for (i = disks; i--; ) {
+ +                      dev = &sh->dev[i];
+ +                      if (test_bit(R5_LOCKED, &dev->flags) &&
+ +                          (i == sh->pd_idx || i == qd_idx ||
+ +                           dev->written)) {
+ +                              pr_debug("Writing block %d\n", i);
+ +                              BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+ +                              set_bit(R5_Wantwrite, &dev->flags);
+ +                              if (!test_bit(R5_Insync, &dev->flags) ||
+ +                                  ((i == sh->pd_idx || i == qd_idx) &&
+ +                                    s.failed == 0))
+ +                                      set_bit(STRIPE_INSYNC, &sh->state);
+ +                      }
+ +              }
+ +              if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ +                      atomic_dec(&conf->preread_active_stripes);
+ +                      if (atomic_read(&conf->preread_active_stripes) <
+ +                              IO_THRESHOLD)
+ +                              md_wakeup_thread(conf->mddev->thread);
+ +              }
+ +      }
+ +
+ +      /* Now to consider new write requests and what else, if anything
+ +       * should be read.  We do not handle new writes when:
+ +       * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+ +       * 2/ A 'check' operation is in flight, as it may clobber the parity
+ +       *    block.
+ +       */
+ +      if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
   
         /* maybe we need to check and possibly fix the parity for this stripe
          * Any reads will already have been scheduled, so we just see if enough
- -       * data is available
+ +       * data is available.  The parity check is held off while parity
+ +       * dependent operations are in flight.
          */
- -      if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
- -              handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+ +      if (sh->check_state ||
+ +          (s.syncing && s.locked == 0 &&
+ +           !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
+ +           !test_bit(STRIPE_INSYNC, &sh->state)))
+ +              handle_parity_checks6(conf, sh, &s, &r6s, disks);
   
         if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
                 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@@ -3382,29 -3178,15 +3381,29 @@@
                                         set_bit(R5_Wantwrite, &dev->flags);
                                         set_bit(R5_ReWrite, &dev->flags);
                                         set_bit(R5_LOCKED, &dev->flags);
+ +                                      s.locked++;
                                 } else {
                                         /* let's read it back */
                                         set_bit(R5_Wantread, &dev->flags);
                                         set_bit(R5_LOCKED, &dev->flags);
+ +                                      s.locked++;
                                 }
                         }
                 }
   
- -      if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ +      /* Finish reconstruct operations initiated by the expansion process */
+ +      if (sh->reconstruct_state == reconstruct_state_result) {
+ +              sh->reconstruct_state = reconstruct_state_idle;
+ +              clear_bit(STRIPE_EXPANDING, &sh->state);
+ +              for (i = conf->raid_disks; i--; ) {
+ +                      set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ +                      set_bit(R5_LOCKED, &sh->dev[i].flags);
+ +                      s.locked++;
+ +              }
+ +      }
+ +
+ +      if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+ +          !sh->reconstruct_state) {
                 struct stripe_head *sh2
                         = get_active_stripe(conf, sh->sector, 1, 1, 1);
                 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@@ -3425,8 -3207,14 +3424,8 @@@
                 /* Need to write out all blocks after computing P&Q */
                 sh->disks = conf->raid_disks;
                 stripe_set_idx(sh->sector, conf, 0, sh);
- -              compute_parity6(sh, RECONSTRUCT_WRITE);
- -              for (i = conf->raid_disks ; i-- ;  ) {
- -                      set_bit(R5_LOCKED, &sh->dev[i].flags);
- -                      s.locked++;
- -                      set_bit(R5_Wantwrite, &sh->dev[i].flags);
- -              }
- -              clear_bit(STRIPE_EXPANDING, &sh->state);
- -      } else if (s.expanded) {
+ +              schedule_reconstruction(sh, &s, 1, 1);
+ +      } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
                 wake_up(&conf->wait_for_overlap);
@@@ -3444,9 -3232,6 +3443,9 @@@
         if (unlikely(blocked_rdev))
                 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
   
+ +      if (s.ops_request)
+ +              raid_run_ops(sh, s.ops_request);
+ +
         ops_run_io(sh, &s);
   
         return_io(return_bi);
@@@ -3455,14 -3240,16 +3454,14 @@@
   }
   
   /* returns true if the stripe was handled */
- -static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+ +static bool handle_stripe(struct stripe_head *sh)
   {
         if (sh->raid_conf->level == 6)
- -              return handle_stripe6(sh, tmp_page);
+ +              return handle_stripe6(sh);
         else
                 return handle_stripe5(sh);
   }
   
- -
- -
   static void raid5_activate_delayed(raid5_conf_t *conf)
   {
         if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@@ -3496,7 -3283,7 +3495,7 @@@ static void activate_bit_delay(raid5_co
   
   static void unplug_slaves(mddev_t *mddev)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         int i;
   
         rcu_read_lock();
@@@ -3520,7 -3307,7 +3519,7 @@@
   static void raid5_unplug_device(struct request_queue *q)
   {
         mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         unsigned long flags;
   
         spin_lock_irqsave(&conf->device_lock, flags);
@@@ -3539,7 -3326,7 +3538,7 @@@
   static int raid5_congested(void *data, int bits)
   {
         mddev_t *mddev = data;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
   
         /* No difference between reads and writes.  Just check
          * how busy the stripe_cache is
@@@ -3564,14 -3351,14 +3563,14 @@@ static int raid5_mergeable_bvec(struct 
         mddev_t *mddev = q->queuedata;
         sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
         int max;
-       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int chunk_sectors = mddev->chunk_sectors;
         unsigned int bio_sectors = bvm->bi_size >> 9;
   
         if ((bvm->bi_rw & 1) == WRITE)
                 return biovec->bv_len; /* always allow writes to be mergeable */
   
-       if (mddev->new_chunk < mddev->chunk_size)
-               chunk_sectors = mddev->new_chunk >> 9;
+       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+               chunk_sectors = mddev->new_chunk_sectors;
         max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
         if (max < 0) max = 0;
         if (max <= biovec->bv_len && bio_sectors == 0)
@@@ -3584,11 -3371,11 +3583,11 @@@
   static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
   {
         sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int chunk_sectors = mddev->chunk_sectors;
         unsigned int bio_sectors = bio->bi_size >> 9;
   
-       if (mddev->new_chunk < mddev->chunk_size)
-               chunk_sectors = mddev->new_chunk >> 9;
+       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+               chunk_sectors = mddev->new_chunk_sectors;
         return  chunk_sectors >=
                 ((sector & (chunk_sectors - 1)) + bio_sectors);
   }
@@@ -3652,7 -3439,7 +3651,7 @@@ static void raid5_align_endio(struct bi
         bio_put(bi);
   
         mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
-       conf = mddev_to_conf(mddev);
+       conf = mddev->private;
         rdev = (void*)raid_bi->bi_next;
         raid_bi->bi_next = NULL;
   
@@@ -3675,10 -3462,10 +3674,10 @@@ static int bio_fits_rdev(struct bio *bi
   {
         struct request_queue *q = bdev_get_queue(bi->bi_bdev);
   
-       if ((bi->bi_size>>9) > q->max_sectors)
+       if ((bi->bi_size>>9) > queue_max_sectors(q))
                 return 0;
         blk_recount_segments(q, bi);
-       if (bi->bi_phys_segments > q->max_phys_segments)
+       if (bi->bi_phys_segments > queue_max_phys_segments(q))
                 return 0;
   
         if (q->merge_bvec_fn)
@@@ -3694,7 -3481,7 +3693,7 @@@
   static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
   {
         mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         unsigned int dd_idx;
         struct bio* align_bi;
         mdk_rdev_t *rdev;
@@@ -3811,7 -3598,7 +3810,7 @@@ static struct stripe_head *__get_priori
   static int make_request(struct request_queue *q, struct bio * bi)
   {
         mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         int dd_idx;
         sector_t new_sector;
         sector_t logical_sector, last_sector;
@@@ -3908,6 -3695,7 +3907,7 @@@
                                 spin_unlock_irq(&conf->device_lock);
                                 if (must_retry) {
                                         release_stripe(sh);
+                                       schedule();
                                         goto retry;
                                 }
                         }
@@@ -4003,10 -3791,10 +4003,10 @@@ static sector_t reshape_request(mddev_
          * If old and new chunk sizes differ, we need to process the
          * largest of these
          */
-       if (mddev->new_chunk > mddev->chunk_size)
-               reshape_sectors = mddev->new_chunk / 512;
+       if (mddev->new_chunk_sectors > mddev->chunk_sectors)
+               reshape_sectors = mddev->new_chunk_sectors;
         else
-               reshape_sectors = mddev->chunk_size / 512;
+               reshape_sectors = mddev->chunk_sectors;
   
         /* we update the metadata when there is more than 3Meg
          * in the block range (that is rather arbitrary, should
@@@ -4129,7 -3917,7 +4129,7 @@@
                                      1, &dd_idx, NULL);
         last_sector =
                 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
-                                           *(new_data_disks) - 1),
+                                           * new_data_disks - 1),
                                      1, &dd_idx, NULL);
         if (last_sector >= mddev->dev_sectors)
                 last_sector = mddev->dev_sectors - 1;
@@@ -4158,7 -3946,7 +4158,7 @@@
                 wait_event(conf->wait_for_overlap,
                            atomic_read(&conf->reshape_stripes) == 0);
                 mddev->reshape_position = conf->reshape_progress;
-               mddev->curr_resync_completed = mddev->curr_resync;
+               mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors;
                 conf->reshape_checkpoint = jiffies;
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
@@@ -4258,7 -4046,7 +4258,7 @@@ static inline sector_t sync_request(mdd
         spin_unlock(&sh->lock);
   
         /* wait for any blocked device to be handled */
- -      while(unlikely(!handle_stripe(sh, NULL)))
+ +      while (unlikely(!handle_stripe(sh)))
                 ;
         release_stripe(sh);
   
@@@ -4315,7 -4103,7 +4315,7 @@@ static int  retry_aligned_read(raid5_co
                         return handled;
                 }
   
- -              handle_stripe(sh, NULL);
+ +              handle_stripe(sh);
                 release_stripe(sh);
                 handled++;
         }
@@@ -4329,36 -4117,6 +4329,36 @@@
         return handled;
   }
   
+ +#ifdef CONFIG_MULTICORE_RAID456
+ +static void __process_stripe(void *param, async_cookie_t cookie)
+ +{
+ +      struct stripe_head *sh = param;
+ +
+ +      handle_stripe(sh);
+ +      release_stripe(sh);
+ +}
+ +
+ +static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+ +{
+ +      async_schedule_domain(__process_stripe, sh, domain);
+ +}
+ +
+ +static void synchronize_stripe_processing(struct list_head *domain)
+ +{
+ +      async_synchronize_full_domain(domain);
+ +}
+ +#else
+ +static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+ +{
+ +      handle_stripe(sh);
+ +      release_stripe(sh);
+ +      cond_resched();
+ +}
+ +
+ +static void synchronize_stripe_processing(struct list_head *domain)
+ +{
+ +}
+ +#endif
   
   
   /*
@@@ -4371,9 -4129,8 +4371,9 @@@
   static void raid5d(mddev_t *mddev)
   {
         struct stripe_head *sh;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         int handled;
+ +      LIST_HEAD(raid_domain);
   
         pr_debug("+++ raid5d active\n");
   
@@@ -4410,7 -4167,8 +4410,7 @@@
                 spin_unlock_irq(&conf->device_lock);
                 
                 handled++;
- -              handle_stripe(sh, conf->spare_page);
- -              release_stripe(sh);
+ +              process_stripe(sh, &raid_domain);
   
                 spin_lock_irq(&conf->device_lock);
         }
@@@ -4418,7 -4176,6 +4418,7 @@@
   
         spin_unlock_irq(&conf->device_lock);
   
+ +      synchronize_stripe_processing(&raid_domain);
         async_tx_issue_pending_all();
         unplug_slaves(mddev);
   
@@@ -4428,7 -4185,7 +4428,7 @@@
   static ssize_t
   raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         if (conf)
                 return sprintf(page, "%d\n", conf->max_nr_stripes);
         else
@@@ -4438,7 -4195,7 +4438,7 @@@
   static ssize_t
   raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         unsigned long new;
         int err;
   
@@@ -4476,7 -4233,7 +4476,7 @@@ raid5_stripecache_size = __ATTR(stripe_
   static ssize_t
   raid5_show_preread_threshold(mddev_t *mddev, char *page)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         if (conf)
                 return sprintf(page, "%d\n", conf->bypass_threshold);
         else
@@@ -4486,7 -4243,7 +4486,7 @@@
   static ssize_t
   raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         unsigned long new;
         if (len >= PAGE_SIZE)
                 return -EINVAL;
@@@ -4510,7 -4267,7 +4510,7 @@@ raid5_preread_bypass_threshold = __ATTR
   static ssize_t
   stripe_cache_active_show(mddev_t *mddev, char *page)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         if (conf)
                 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
         else
@@@ -4534,7 -4291,7 +4534,7 @@@ static struct attribute_group raid5_att
   static sector_t
   raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
   
         if (!sectors)
                 sectors = mddev->dev_sectors;
@@@ -4546,123 -4303,11 +4546,123 @@@
                         raid_disks = conf->previous_raid_disks;
         }
   
-       sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
-       sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+       sectors &= ~((sector_t)mddev->chunk_sectors - 1);
+       sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
         return sectors * (raid_disks - conf->max_degraded);
   }
   
+ +static void raid5_free_percpu(raid5_conf_t *conf)
+ +{
+ +      struct raid5_percpu *percpu;
+ +      unsigned long cpu;
+ +
+ +      if (!conf->percpu)
+ +              return;
+ +
+ +      get_online_cpus();
+ +      for_each_possible_cpu(cpu) {
+ +              percpu = per_cpu_ptr(conf->percpu, cpu);
+ +              safe_put_page(percpu->spare_page);
+ +              kfree(percpu->scribble);
+ +      }
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      unregister_cpu_notifier(&conf->cpu_notify);
+ +#endif
+ +      put_online_cpus();
+ +
+ +      free_percpu(conf->percpu);
+ +}
+ +
+ +static void free_conf(raid5_conf_t *conf)
+ +{
+ +      shrink_stripes(conf);
+ +      raid5_free_percpu(conf);
+ +      kfree(conf->disks);
+ +      kfree(conf->stripe_hashtbl);
+ +      kfree(conf);
+ +}
+ +
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
+ +                            void *hcpu)
+ +{
+ +      raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
+ +      long cpu = (long)hcpu;
+ +      struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+ +
+ +      switch (action) {
+ +      case CPU_UP_PREPARE:
+ +      case CPU_UP_PREPARE_FROZEN:
+ +              if (conf->level == 6 && !percpu->spare_page)
+ +                      percpu->spare_page = alloc_page(GFP_KERNEL);
+ +              if (!percpu->scribble)
+ +                      percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+ +
+ +              if (!percpu->scribble ||
+ +                  (conf->level == 6 && !percpu->spare_page)) {
+ +                      safe_put_page(percpu->spare_page);
+ +                      kfree(percpu->scribble);
+ +                      pr_err("%s: failed memory allocation for cpu%ld\n",
+ +                             __func__, cpu);
+ +                      return NOTIFY_BAD;
+ +              }
+ +              break;
+ +      case CPU_DEAD:
+ +      case CPU_DEAD_FROZEN:
+ +              safe_put_page(percpu->spare_page);
+ +              kfree(percpu->scribble);
+ +              percpu->spare_page = NULL;
+ +              percpu->scribble = NULL;
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +      return NOTIFY_OK;
+ +}
+ +#endif
+ +
+ +static int raid5_alloc_percpu(raid5_conf_t *conf)
+ +{
+ +      unsigned long cpu;
+ +      struct page *spare_page;
+ +      struct raid5_percpu *allcpus;
+ +      void *scribble;
+ +      int err;
+ +
+ +      allcpus = alloc_percpu(struct raid5_percpu);
+ +      if (!allcpus)
+ +              return -ENOMEM;
+ +      conf->percpu = allcpus;
+ +
+ +      get_online_cpus();
+ +      err = 0;
+ +      for_each_present_cpu(cpu) {
+ +              if (conf->level == 6) {
+ +                      spare_page = alloc_page(GFP_KERNEL);
+ +                      if (!spare_page) {
+ +                              err = -ENOMEM;
+ +                              break;
+ +                      }
+ +                      per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+ +              }
+ +              scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
+ +              if (!scribble) {
+ +                      err = -ENOMEM;
+ +                      break;
+ +              }
+ +              per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+ +      }
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      conf->cpu_notify.notifier_call = raid456_cpu_notify;
+ +      conf->cpu_notify.priority = 0;
+ +      if (err == 0)
+ +              err = register_cpu_notifier(&conf->cpu_notify);
+ +#endif
+ +      put_online_cpus();
+ +
+ +      return err;
+ +}
+ +
   static raid5_conf_t *setup_conf(mddev_t *mddev)
   {
         raid5_conf_t *conf;
@@@ -4691,9 -4336,11 +4691,11 @@@
                 return ERR_PTR(-EINVAL);
         }
   
-       if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+       if (!mddev->new_chunk_sectors ||
+           (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
+           !is_power_of_2(mddev->new_chunk_sectors)) {
                 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
-                       mddev->new_chunk, mdname(mddev));
+                      mddev->new_chunk_sectors << 9, mdname(mddev));
                 return ERR_PTR(-EINVAL);
         }
   
@@@ -4702,7 -4349,6 +4704,7 @@@
                 goto abort;
   
         conf->raid_disks = mddev->raid_disks;
+ +      conf->scribble_len = scribble_len(conf->raid_disks);
         if (mddev->reshape_position == MaxSector)
                 conf->previous_raid_disks = mddev->raid_disks;
         else
@@@ -4718,10 -4364,11 +4720,10 @@@
         if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                 goto abort;
   
- -      if (mddev->new_level == 6) {
- -              conf->spare_page = alloc_page(GFP_KERNEL);
- -              if (!conf->spare_page)
- -                      goto abort;
- -      }
+ +      conf->level = mddev->new_level;
+ +      if (raid5_alloc_percpu(conf) != 0)
+ +              goto abort;
+ +
         spin_lock_init(&conf->device_lock);
         init_waitqueue_head(&conf->wait_for_stripe);
         init_waitqueue_head(&conf->wait_for_overlap);
@@@ -4756,7 -4403,8 +4758,8 @@@
                         conf->fullsync = 1;
         }
   
-       conf->chunk_size = mddev->new_chunk;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
+       conf->level = mddev->new_level;
         if (conf->level == 6)
                 conf->max_degraded = 2;
         else
@@@ -4765,7 -4413,7 +4768,7 @@@
         conf->max_nr_stripes = NR_STRIPES;
         conf->reshape_progress = mddev->reshape_position;
         if (conf->reshape_progress != MaxSector) {
-               conf->prev_chunk = mddev->chunk_size;
+               conf->prev_chunk_sectors = mddev->chunk_sectors;
                 conf->prev_algo = mddev->layout;
         }
   
@@@ -4791,7 -4439,11 +4794,7 @@@
   
    abort:
         if (conf) {
- -              shrink_stripes(conf);
- -              safe_put_page(conf->spare_page);
- -              kfree(conf->disks);
- -              kfree(conf->stripe_hashtbl);
- -              kfree(conf);
+ +              free_conf(conf);
                 return ERR_PTR(-EIO);
         } else
                 return ERR_PTR(-ENOMEM);
@@@ -4803,6 -4455,10 +4806,10 @@@ static int run(mddev_t *mddev
         int working_disks = 0;
         mdk_rdev_t *rdev;
   
+       if (mddev->recovery_cp != MaxSector)
+               printk(KERN_NOTICE "raid5: %s is not clean"
+                      " -- starting background reconstruction\n",
+                      mdname(mddev));
         if (mddev->reshape_position != MaxSector) {
                 /* Check that we can continue the reshape.
                  * Currently only disks can change, it must
@@@ -4825,7 -4481,7 +4832,7 @@@
                  * geometry.
                  */
                 here_new = mddev->reshape_position;
-               if (sector_div(here_new, (mddev->new_chunk>>9)*
+               if (sector_div(here_new, mddev->new_chunk_sectors *
                                (mddev->raid_disks - max_degraded))) {
                         printk(KERN_ERR "raid5: reshape_position not "
                                "on a stripe boundary\n");
@@@ -4833,7 -4489,7 +4840,7 @@@
                 }
                 /* here_new is the stripe we will write to */
                 here_old = mddev->reshape_position;
-               sector_div(here_old, (mddev->chunk_size>>9)*
+               sector_div(here_old, mddev->chunk_sectors *
                            (old_disks-max_degraded));
                 /* here_old is the first stripe that we might need to read
                  * from */
@@@ -4848,7 -4504,7 +4855,7 @@@
         } else {
                 BUG_ON(mddev->level != mddev->new_level);
                 BUG_ON(mddev->layout != mddev->new_layout);
-               BUG_ON(mddev->chunk_size != mddev->new_chunk);
+               BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
                 BUG_ON(mddev->delta_disks != 0);
         }
   
@@@ -4882,7 -4538,7 +4889,7 @@@
         }
   
         /* device size must be a multiple of chunk size */
-       mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+       mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
         mddev->resync_max_sectors = mddev->dev_sectors;
   
         if (mddev->degraded > 0 &&
@@@ -4931,7 -4587,7 +4938,7 @@@
         {
                 int data_disks = conf->previous_raid_disks - conf->max_degraded;
                 int stripe = data_disks *
-                       (mddev->chunk_size / PAGE_SIZE);
+                       ((mddev->chunk_sectors << 9) / PAGE_SIZE);
                 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
                         mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
         }
@@@ -4957,8 -4613,12 +4964,8 @@@ abort
         md_unregister_thread(mddev->thread);
         mddev->thread = NULL;
         if (conf) {
- -              shrink_stripes(conf);
                 print_raid5_conf(conf);
- -              safe_put_page(conf->spare_page);
- -              kfree(conf->disks);
- -              kfree(conf->stripe_hashtbl);
- -              kfree(conf);
+ +              free_conf(conf);
         }
         mddev->private = NULL;
         printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@@@ -4973,10 -4633,13 +4980,10 @@@ static int stop(mddev_t *mddev
   
         md_unregister_thread(mddev->thread);
         mddev->thread = NULL;
- -      shrink_stripes(conf);
- -      kfree(conf->stripe_hashtbl);
         mddev->queue->backing_dev_info.congested_fn = NULL;
         blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
         sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
- -      kfree(conf->disks);
- -      kfree(conf);
+ +      free_conf(conf);
         mddev->private = NULL;
         return 0;
   }
@@@ -5021,7 -4684,8 +5028,8 @@@ static void status(struct seq_file *seq
         raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
         int i;
   
-       seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
+       seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
+               mddev->chunk_sectors / 2, mddev->layout);
         seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
         for (i = 0; i < conf->raid_disks; i++)
                 seq_printf (seq, "%s",
@@@ -5169,7 -4833,7 +5177,7 @@@ static int raid5_resize(mddev_t *mddev
          * any io in the removed space completes, but it hardly seems
          * worth it.
          */
-       sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+       sectors &= ~((sector_t)mddev->chunk_sectors - 1);
         md_set_array_sectors(mddev, raid5_size(mddev, sectors,
                                                mddev->raid_disks));
         if (mddev->array_sectors >
@@@ -5186,14 -4850,37 +5194,37 @@@
         return 0;
   }
   
- static int raid5_check_reshape(mddev_t *mddev)
+ static int check_stripe_cache(mddev_t *mddev)
+ {
+       /* Can only proceed if there are plenty of stripe_heads.
+        * We need a minimum of one full stripe,, and for sensible progress
+        * it is best to have about 4 times that.
+        * If we require 4 times, then the default 256 4K stripe_heads will
+        * allow for chunk sizes up to 256K, which is probably OK.
+        * If the chunk size is greater, user-space should request more
+        * stripe_heads first.
+        */
+       raid5_conf_t *conf = mddev->private;
+       if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+           > conf->max_nr_stripes ||
+           ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+           > conf->max_nr_stripes) {
+               printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
+                      ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
+                       / STRIPE_SIZE)*4);
+               return 0;
+       }
+       return 1;
+ }
+ 
+ static int check_reshape(mddev_t *mddev)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
   
         if (mddev->delta_disks == 0 &&
             mddev->new_layout == mddev->layout &&
-           mddev->new_chunk == mddev->chunk_size)
-               return -EINVAL; /* nothing to do */
+           mddev->new_chunk_sectors == mddev->chunk_sectors)
+               return 0; /* nothing to do */
         if (mddev->bitmap)
                 /* Cannot grow a bitmap yet */
                 return -EBUSY;
@@@ -5212,28 -4899,15 +5243,15 @@@
                         return -EINVAL;
         }
   
-       /* Can only proceed if there are plenty of stripe_heads.
-        * We need a minimum of one full stripe,, and for sensible progress
-        * it is best to have about 4 times that.
-        * If we require 4 times, then the default 256 4K stripe_heads will
-        * allow for chunk sizes up to 256K, which is probably OK.
-        * If the chunk size is greater, user-space should request more
-        * stripe_heads first.
-        */
-       if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
-           (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
-               printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
-                      (max(mddev->chunk_size, mddev->new_chunk)
-                       / STRIPE_SIZE)*4);
+       if (!check_stripe_cache(mddev))
                 return -ENOSPC;
-       }
   
         return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
   }
   
   static int raid5_start_reshape(mddev_t *mddev)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
         mdk_rdev_t *rdev;
         int spares = 0;
         int added_devices = 0;
@@@ -5242,6 -4916,9 +5260,9 @@@
         if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                 return -EBUSY;
   
+       if (!check_stripe_cache(mddev))
+               return -ENOSPC;
+ 
         list_for_each_entry(rdev, &mddev->disks, same_set)
                 if (rdev->raid_disk < 0 &&
                     !test_bit(Faulty, &rdev->flags))
@@@ -5268,8 -4945,8 +5289,8 @@@
         spin_lock_irq(&conf->device_lock);
         conf->previous_raid_disks = conf->raid_disks;
         conf->raid_disks += mddev->delta_disks;
-       conf->prev_chunk = conf->chunk_size;
-       conf->chunk_size = mddev->new_chunk;
+       conf->prev_chunk_sectors = conf->chunk_sectors;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
         conf->prev_algo = conf->algorithm;
         conf->algorithm = mddev->new_layout;
         if (mddev->delta_disks < 0)
@@@ -5351,7 -5028,7 +5372,7 @@@ static void end_reshape(raid5_conf_t *c
                  */
                 {
                         int data_disks = conf->raid_disks - conf->max_degraded;
-                       int stripe = data_disks * (conf->chunk_size
+                       int stripe = data_disks * ((conf->chunk_sectors << 9)
                                                    / PAGE_SIZE);
                         if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
                                 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
@@@ -5365,7 -5042,7 +5386,7 @@@
   static void raid5_finish_reshape(mddev_t *mddev)
   {
         struct block_device *bdev;
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
   
         if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
   
@@@ -5396,7 -5073,7 +5417,7 @@@
                                 raid5_remove_disk(mddev, d);
                 }
                 mddev->layout = conf->algorithm;
-               mddev->chunk_size = conf->chunk_size;
+               mddev->chunk_sectors = conf->chunk_sectors;
                 mddev->reshape_position = MaxSector;
                 mddev->delta_disks = 0;
         }
@@@ -5404,7 -5081,7 +5425,7 @@@
   
   static void raid5_quiesce(mddev_t *mddev, int state)
   {
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
   
         switch(state) {
         case 2: /* resume for a suspend */
@@@ -5454,7 -5131,7 +5475,7 @@@ static void *raid5_takeover_raid1(mddev
   
         mddev->new_level = 5;
         mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
-       mddev->new_chunk = chunksect << 9;
+       mddev->new_chunk_sectors = chunksect;
   
         return setup_conf(mddev);
   }
@@@ -5493,24 -5170,24 +5514,24 @@@ static void *raid5_takeover_raid6(mddev
   }
   
   
- static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+ static int raid5_check_reshape(mddev_t *mddev)
   {
         /* For a 2-drive array, the layout and chunk size can be changed
          * immediately as not restriping is needed.
          * For larger arrays we record the new value - after validation
          * to be used by a reshape pass.
          */
-       raid5_conf_t *conf = mddev_to_conf(mddev);
+       raid5_conf_t *conf = mddev->private;
+       int new_chunk = mddev->new_chunk_sectors;
   
-       if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+       if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
                 return -EINVAL;
         if (new_chunk > 0) {
-               if (new_chunk & (new_chunk-1))
-                       /* not a power of 2 */
+               if (!is_power_of_2(new_chunk))
                         return -EINVAL;
-               if (new_chunk < PAGE_SIZE)
+               if (new_chunk < (PAGE_SIZE>>9))
                         return -EINVAL;
-               if (mddev->array_sectors & ((new_chunk>>9)-1))
+               if (mddev->array_sectors & (new_chunk-1))
                         /* not factor of array size */
                         return -EINVAL;
         }
@@@ -5518,49 -5195,39 +5539,39 @@@
         /* They look valid */
   
         if (mddev->raid_disks == 2) {
- 
-               if (new_layout >= 0) {
-                       conf->algorithm = new_layout;
-                       mddev->layout = mddev->new_layout = new_layout;
+               /* can make the change immediately */
+               if (mddev->new_layout >= 0) {
+                       conf->algorithm = mddev->new_layout;
+                       mddev->layout = mddev->new_layout;
                 }
                 if (new_chunk > 0) {
-                       conf->chunk_size = new_chunk;
-                       mddev->chunk_size = mddev->new_chunk = new_chunk;
+                       conf->chunk_sectors = new_chunk ;
+                       mddev->chunk_sectors = new_chunk;
                 }
                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 md_wakeup_thread(mddev->thread);
-       } else {
-               if (new_layout >= 0)
-                       mddev->new_layout = new_layout;
-               if (new_chunk > 0)
-                       mddev->new_chunk = new_chunk;
         }
-       return 0;
+       return check_reshape(mddev);
   }
   
- static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+ static int raid6_check_reshape(mddev_t *mddev)
   {
-       if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+       int new_chunk = mddev->new_chunk_sectors;
+ 
+       if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
                 return -EINVAL;
         if (new_chunk > 0) {
-               if (new_chunk & (new_chunk-1))
-                       /* not a power of 2 */
+               if (!is_power_of_2(new_chunk))
                         return -EINVAL;
-               if (new_chunk < PAGE_SIZE)
+               if (new_chunk < (PAGE_SIZE >> 9))
                         return -EINVAL;
-               if (mddev->array_sectors & ((new_chunk>>9)-1))
+               if (mddev->array_sectors & (new_chunk-1))
                         /* not factor of array size */
                         return -EINVAL;
         }
   
         /* They look valid */
- 
-       if (new_layout >= 0)
-               mddev->new_layout = new_layout;
-       if (new_chunk > 0)
-               mddev->new_chunk = new_chunk;
- 
-       return 0;
+       return check_reshape(mddev);
   }
   
   static void *raid5_takeover(mddev_t *mddev)
@@@ -5570,8 -5237,6 +5581,6 @@@
          *  raid1 - if there are two drives.  We need to know the chunk size
          *  raid4 - trivial - just use a raid4 layout.
          *  raid6 - Providing it is a *_6 layout
-        *
-        * For now, just do raid1
          */
   
         if (mddev->level == 1)
@@@ -5653,12 -5318,11 +5662,11 @@@ static struct mdk_personality raid6_per
         .sync_request   = sync_request,
         .resize         = raid5_resize,
         .size           = raid5_size,
-       .check_reshape  = raid5_check_reshape,
+       .check_reshape  = raid6_check_reshape,
         .start_reshape  = raid5_start_reshape,
         .finish_reshape = raid5_finish_reshape,
         .quiesce        = raid5_quiesce,
         .takeover       = raid6_takeover,
-       .reconfig       = raid6_reconfig,
   };
   static struct mdk_personality raid5_personality =
   {
@@@ -5681,7 -5345,6 +5689,6 @@@
         .finish_reshape = raid5_finish_reshape,
         .quiesce        = raid5_quiesce,
         .takeover       = raid5_takeover,
-       .reconfig       = raid5_reconfig,
   };
   
   static struct mdk_personality raid4_personality =
diff --combined drivers/md/raid5.h

index 116d0b44b2a9ea9f7b302c38d28c05643006f249,9459689c4ea00a50073cdcfca38ee038bda10364..2390e0e83daf7c939344f8062a9d4a6ef737c476
--- 1/drivers/md/raid5.h
--- 2/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@@ -2,7 -2,6 +2,7 @@@
   #define _RAID5_H
   
   #include <linux/raid/xor.h>
+ +#include <linux/dmaengine.h>
   
   /*
    *
@@@ -176,9 -175,7 +176,9 @@@
    */
   enum check_states {
         check_state_idle = 0,
- -      check_state_run, /* parity check */
+ +      check_state_run, /* xor parity check */
+ +      check_state_run_q, /* q-parity check */
+ +      check_state_run_pq, /* pq dual parity check */
         check_state_check_result,
         check_state_compute_run, /* parity repair */
         check_state_compute_result,
@@@ -218,8 -215,8 +218,8 @@@ struct stripe_head 
          * @target - STRIPE_OP_COMPUTE_BLK target
          */
         struct stripe_operations {
- -              int                target;
- -              u32                zero_sum_result;
+ +              int                  target, target2;
+ +              enum sum_check_flags zero_sum_result;
         } ops;
         struct r5dev {
                 struct bio      req;
@@@ -301,7 -298,7 +301,7 @@@ struct r6_state 
   #define STRIPE_OP_COMPUTE_BLK 1
   #define STRIPE_OP_PREXOR      2
   #define STRIPE_OP_BIODRAIN    3
- -#define STRIPE_OP_POSTXOR     4
+ +#define STRIPE_OP_RECONSTRUCT 4
   #define STRIPE_OP_CHECK       5
   
   /*
@@@ -337,7 -334,8 +337,8 @@@ struct raid5_private_data 
         struct hlist_head       *stripe_hashtbl;
         mddev_t                 *mddev;
         struct disk_info        *spare;
-       int                     chunk_size, level, algorithm;
+       int                     chunk_sectors;
+       int                     level, algorithm;
         int                     max_degraded;
         int                     raid_disks;
         int                     max_nr_stripes;
@@@ -353,7 -351,8 +354,8 @@@
          */
         sector_t                reshape_safe;
         int                     previous_raid_disks;
-       int                     prev_chunk, prev_algo;
+       int                     prev_chunk_sectors;
+       int                     prev_algo;
         short                   generation; /* increments with every reshape */
         unsigned long           reshape_checkpoint; /* Time we last updated
                                                      * metadata */
@@@ -386,21 -385,8 +388,21 @@@
                                             * (fresh device added).
                                             * Cleared when a sync completes.
                                             */
- -
- -      struct page             *spare_page; /* Used when checking P/Q in raid6 */
+ +      /* per cpu variables */
+ +      struct raid5_percpu {
+ +              struct page     *spare_page; /* Used when checking P/Q in raid6 */
+ +              void            *scribble;   /* space for constructing buffer
+ +                                            * lists and performing address
+ +                                            * conversions
+ +                                            */
+ +      } *percpu;
+ +      size_t                  scribble_len; /* size of scribble region must be
+ +                                             * associated with conf to handle
+ +                                             * cpu hotplug while reshaping
+ +                                             */
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      struct notifier_block   cpu_notify;
+ +#endif
   
         /*
          * Free stripes pool
@@@ -424,8 -410,6 +426,6 @@@
   
   typedef struct raid5_private_data raid5_conf_t;
   
- #define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private)
- 
   /*
    * Our supported algorithms
    */
diff --combined include/linux/dmaengine.h

index 835b9c7bf1c26c61c908b2a00324d4ff27956808,f114bc7790bc46cfefbb8bd7417ebd35e7eea77f..2b9f2ac7ed60f0e8c61e79f2f7791545f877e1ef
--- 1/include/linux/dmaengine.h
--- 2/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@@ -48,20 -48,19 +48,20 @@@ enum dma_status 
   
   /**
    * enum dma_transaction_type - DMA transaction types/indexes
+ + *
+ + * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ + * automatically set as dma devices are registered.
    */
   enum dma_transaction_type {
         DMA_MEMCPY,
         DMA_XOR,
- -      DMA_PQ_XOR,
- -      DMA_DUAL_XOR,
- -      DMA_PQ_UPDATE,
- -      DMA_ZERO_SUM,
- -      DMA_PQ_ZERO_SUM,
+ +      DMA_PQ,
+ +      DMA_XOR_VAL,
+ +      DMA_PQ_VAL,
         DMA_MEMSET,
- -      DMA_MEMCPY_CRC32C,
         DMA_INTERRUPT,
         DMA_PRIVATE,
+ +      DMA_ASYNC_TX,
         DMA_SLAVE,
   };
   
@@@ -71,25 -70,18 +71,25 @@@
   
   /**
    * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- - *    control completion, and communicate status.
+ + *  control completion, and communicate status.
    * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- - *    this transaction
+ + *  this transaction
    * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- - *    acknowledges receipt, i.e. has has a chance to establish any
- - *    dependency chains
+ + *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ + *  chains
    * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
    * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
    * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
    *    (if not set, do the source dma-unmapping as page)
    * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
    *    (if not set, do the destination dma-unmapping as page)
+ + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ + *  sources that were the result of a previous operation, in the case of a PQ
+ + *  operation it continues the calculation with new sources
+ + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ + *  on the result of this operation
    */
   enum dma_ctrl_flags {
         DMA_PREP_INTERRUPT = (1 << 0),
@@@ -98,31 -90,8 +98,31 @@@
         DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
         DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
         DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
+ +      DMA_PREP_PQ_DISABLE_P = (1 << 6),
+ +      DMA_PREP_PQ_DISABLE_Q = (1 << 7),
+ +      DMA_PREP_CONTINUE = (1 << 8),
+ +      DMA_PREP_FENCE = (1 << 9),
   };
   
+ +/**
+ + * enum sum_check_bits - bit position of pq_check_flags
+ + */
+ +enum sum_check_bits {
+ +      SUM_CHECK_P = 0,
+ +      SUM_CHECK_Q = 1,
+ +};
+ +
+ +/**
+ + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ + */
+ +enum sum_check_flags {
+ +      SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+ +      SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+ +};
+ +
+ +
   /**
    * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
    * See linux/cpumask.h
@@@ -211,8 -180,6 +211,6 @@@ typedef void (*dma_async_tx_callback)(v
    * @flags: flags to augment operation preparation, control completion, and
    *    communicate status
    * @phys: physical address of the descriptor
-  * @tx_list: driver common field for operations that require multiple
-  *    descriptors
    * @chan: target channel for this operation
    * @tx_submit: set the prepared descriptor(s) to be executed by the engine
    * @callback: routine to call after this operation is complete
@@@ -226,7 -193,6 +224,6 @@@ struct dma_async_tx_descriptor 
         dma_cookie_t cookie;
         enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
         dma_addr_t phys;
-       struct list_head tx_list;
         struct dma_chan *chan;
         dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
         dma_async_tx_callback callback;
@@@ -244,11 -210,6 +241,11 @@@
    * @global_node: list_head for global dma_device_list
    * @cap_mask: one or more dma_capability flags
    * @max_xor: maximum number of xor sources, 0 if no capability
+ + * @max_pq: maximum number of PQ sources and PQ-continue capability
+ + * @copy_align: alignment shift for memcpy operations
+ + * @xor_align: alignment shift for xor operations
+ + * @pq_align: alignment shift for pq operations
+ + * @fill_align: alignment shift for memset operations
    * @dev_id: unique device ID
    * @dev: struct device reference for dma mapping api
    * @device_alloc_chan_resources: allocate resources and return the
@@@ -256,9 -217,7 +253,9 @@@
    * @device_free_chan_resources: release DMA channel's resources
    * @device_prep_dma_memcpy: prepares a memcpy operation
    * @device_prep_dma_xor: prepares a xor operation
- - * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ + * @device_prep_dma_xor_val: prepares a xor validation operation
+ + * @device_prep_dma_pq: prepares a pq operation
+ + * @device_prep_dma_pq_val: prepares a pqzero_sum operation
    * @device_prep_dma_memset: prepares a memset operation
    * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
    * @device_prep_slave_sg: prepares a slave dma operation
@@@ -273,13 -232,7 +270,13 @@@ struct dma_device 
         struct list_head channels;
         struct list_head global_node;
         dma_cap_mask_t  cap_mask;
- -      int max_xor;
+ +      unsigned short max_xor;
+ +      unsigned short max_pq;
+ +      u8 copy_align;
+ +      u8 xor_align;
+ +      u8 pq_align;
+ +      u8 fill_align;
+ +      #define DMA_HAS_PQ_CONTINUE (1 << 15)
   
         int dev_id;
         struct device *dev;
@@@ -293,17 -246,9 +290,17 @@@
         struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
                 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
                 unsigned int src_cnt, size_t len, unsigned long flags);
- -      struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+ +      struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
                 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
- -              size_t len, u32 *result, unsigned long flags);
+ +              size_t len, enum sum_check_flags *result, unsigned long flags);
+ +      struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+ +              struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ +              unsigned int src_cnt, const unsigned char *scf,
+ +              size_t len, unsigned long flags);
+ +      struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+ +              struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ +              unsigned int src_cnt, const unsigned char *scf, size_t len,
+ +              enum sum_check_flags *pqres, unsigned long flags);
         struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
                 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
                 unsigned long flags);
@@@ -322,96 -267,6 +319,96 @@@
         void (*device_issue_pending)(struct dma_chan *chan);
   };
   
+ +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+ +{
+ +      size_t mask;
+ +
+ +      if (!align)
+ +              return true;
+ +      mask = (1 << align) - 1;
+ +      if (mask & (off1 | off2 | len))
+ +              return false;
+ +      return true;
+ +}
+ +
+ +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+ +                                     size_t off2, size_t len)
+ +{
+ +      return dmaengine_check_align(dev->copy_align, off1, off2, len);
+ +}
+ +
+ +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+ +                                    size_t off2, size_t len)
+ +{
+ +      return dmaengine_check_align(dev->xor_align, off1, off2, len);
+ +}
+ +
+ +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+ +                                   size_t off2, size_t len)
+ +{
+ +      return dmaengine_check_align(dev->pq_align, off1, off2, len);
+ +}
+ +
+ +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+ +                                     size_t off2, size_t len)
+ +{
+ +      return dmaengine_check_align(dev->fill_align, off1, off2, len);
+ +}
+ +
+ +static inline void
+ +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+ +{
+ +      dma->max_pq = maxpq;
+ +      if (has_pq_continue)
+ +              dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+ +}
+ +
+ +static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+ +{
+ +      return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+ +}
+ +
+ +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+ +{
+ +      enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+ +
+ +      return (flags & mask) == mask;
+ +}
+ +
+ +static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+ +{
+ +      return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+ +}
+ +
+ +static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+ +{
+ +      return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+ +}
+ +
+ +/* dma_maxpq - reduce maxpq in the face of continued operations
+ + * @dma - dma device with PQ capability
+ + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ + *
+ + * When an engine does not support native continuation we need 3 extra
+ + * source slots to reuse P and Q with the following coefficients:
+ + * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ + * 2/ {01} * Q : use Q to continue Q' calculation
+ + * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ + *
+ + * In the case where P is disabled we only need 1 extra source:
+ + * 1/ {01} * Q : use Q to continue Q' calculation
+ + */
+ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+ +{
+ +      if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+ +              return dma_dev_to_maxpq(dma);
+ +      else if (dmaf_p_disabled_continue(flags))
+ +              return dma_dev_to_maxpq(dma) - 1;
+ +      else if (dmaf_continue(flags))
+ +              return dma_dev_to_maxpq(dma) - 3;
+ +      BUG();
+ +}
+ +
   /* --- public DMA engine API --- */
   
   #ifdef CONFIG_DMA_ENGINE
@@@ -441,11 -296,7 +438,11 @@@ static inline void net_dmaengine_put(vo
   #ifdef CONFIG_ASYNC_TX_DMA
   #define async_dmaengine_get() dmaengine_get()
   #define async_dmaengine_put() dmaengine_put()
+ +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+ +#else
   #define async_dma_find_channel(type) dma_find_channel(type)
+ +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
   #else
   static inline void async_dmaengine_get(void)
   {
@@@ -458,7 -309,7 +455,7 @@@ async_dma_find_channel(enum dma_transac
   {
         return NULL;
   }
- -#endif
+ +#endif /* CONFIG_ASYNC_TX_DMA */
   
   dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
         void *dest, void *src, size_t len);
diff --combined include/linux/pci_ids.h

index 2b4b8ce532564677f01c1705f2c54cab407ec53b,a3b0003657955da3651542059572387fda381308..bbeb13ceb8e8def5b9e46c09e3764dad1f7d8c18
--- 1/include/linux/pci_ids.h
--- 2/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@@ -104,6 -104,7 +104,7 @@@
   #define PCI_CLASS_SERIAL_USB_UHCI     0x0c0300
   #define PCI_CLASS_SERIAL_USB_OHCI     0x0c0310
   #define PCI_CLASS_SERIAL_USB_EHCI     0x0c0320
+ #define PCI_CLASS_SERIAL_USB_XHCI     0x0c0330
   #define PCI_CLASS_SERIAL_FIBER                0x0c04
   #define PCI_CLASS_SERIAL_SMBUS                0x0c05
   
@@@ -1005,6 -1006,7 +1006,7 @@@
   #define PCI_DEVICE_ID_PLX_PCI200SYN   0x3196
   #define PCI_DEVICE_ID_PLX_9030          0x9030
   #define PCI_DEVICE_ID_PLX_9050                0x9050
+ #define PCI_DEVICE_ID_PLX_9056                0x9056
   #define PCI_DEVICE_ID_PLX_9080                0x9080
   #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2        0xa001
   
@@@ -1066,8 -1068,6 +1068,6 @@@
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS       0x0034
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE 0x0035
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA        0x0036
- #define PCI_DEVICE_ID_NVIDIA_NVENET_10                0x0037
- #define PCI_DEVICE_ID_NVIDIA_NVENET_11                0x0038
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2       0x003e
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_ULTRA 0x0040
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800       0x0041
@@@ -1078,21 -1078,16 +1078,16 @@@
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE 0x0053
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA        0x0054
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2       0x0055
- #define PCI_DEVICE_ID_NVIDIA_NVENET_8         0x0056
- #define PCI_DEVICE_ID_NVIDIA_NVENET_9         0x0057
   #define PCI_DEVICE_ID_NVIDIA_CK804_AUDIO      0x0059
   #define PCI_DEVICE_ID_NVIDIA_CK804_PCIE               0x005d
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2_SMBUS    0x0064
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE      0x0065
- #define PCI_DEVICE_ID_NVIDIA_NVENET_2         0x0066
   #define PCI_DEVICE_ID_NVIDIA_MCP2_MODEM               0x0069
   #define PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO               0x006a
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SMBUS   0x0084
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE     0x0085
- #define PCI_DEVICE_ID_NVIDIA_NVENET_4         0x0086
   #define PCI_DEVICE_ID_NVIDIA_MCP2S_MODEM      0x0089
   #define PCI_DEVICE_ID_NVIDIA_CK8_AUDIO                0x008a
- #define PCI_DEVICE_ID_NVIDIA_NVENET_5         0x008c
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA    0x008e
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GT   0x0090
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GTX 0x0091
@@@ -1108,15 -1103,12 +1103,12 @@@
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3          0x00d1
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS    0x00d4
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE      0x00d5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_3         0x00d6
   #define PCI_DEVICE_ID_NVIDIA_MCP3_MODEM               0x00d9
   #define PCI_DEVICE_ID_NVIDIA_MCP3_AUDIO               0x00da
- #define PCI_DEVICE_ID_NVIDIA_NVENET_7         0x00df
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3S         0x00e1
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA    0x00e3
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS   0x00e4
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE     0x00e5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_6         0x00e6
   #define PCI_DEVICE_ID_NVIDIA_CK8S_AUDIO               0x00ea
   #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2   0x00ee
   #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1 0x00f0
@@@ -1176,7 -1168,6 +1168,6 @@@
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_SMBUS     0x01b4
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE               0x01bc
   #define PCI_DEVICE_ID_NVIDIA_MCP1_MODEM               0x01c1
- #define PCI_DEVICE_ID_NVIDIA_NVENET_1         0x01c3
   #define PCI_DEVICE_ID_NVIDIA_NFORCE2          0x01e0
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE3         0x0200
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1               0x0201
@@@ -1199,8 -1190,6 +1190,6 @@@
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA        0x037E
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2       0x037F
- #define PCI_DEVICE_ID_NVIDIA_NVENET_12                0x0268
- #define PCI_DEVICE_ID_NVIDIA_NVENET_13                0x0269
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800 0x0280
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X    0x0281
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE     0x0282
@@@ -1247,46 -1236,21 +1236,21 @@@
   #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2    0x0348
   #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000       0x034C
   #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100         0x034E
- #define PCI_DEVICE_ID_NVIDIA_NVENET_14              0x0372
   #define PCI_DEVICE_ID_NVIDIA_NVENET_15              0x0373
- #define PCI_DEVICE_ID_NVIDIA_NVENET_16              0x03E5
- #define PCI_DEVICE_ID_NVIDIA_NVENET_17              0x03E6
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA      0x03E7
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SMBUS           0x03EB
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE       0x03EC
- #define PCI_DEVICE_ID_NVIDIA_NVENET_18              0x03EE
- #define PCI_DEVICE_ID_NVIDIA_NVENET_19              0x03EF
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2     0x03F6
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3     0x03F7
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_SMBUS           0x0446
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE     0x0448
- #define PCI_DEVICE_ID_NVIDIA_NVENET_20              0x0450
- #define PCI_DEVICE_ID_NVIDIA_NVENET_21              0x0451
- #define PCI_DEVICE_ID_NVIDIA_NVENET_22              0x0452
- #define PCI_DEVICE_ID_NVIDIA_NVENET_23              0x0453
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_SMBUS     0x0542
- #define PCI_DEVICE_ID_NVIDIA_NVENET_24              0x054C
- #define PCI_DEVICE_ID_NVIDIA_NVENET_25              0x054D
- #define PCI_DEVICE_ID_NVIDIA_NVENET_26              0x054E
- #define PCI_DEVICE_ID_NVIDIA_NVENET_27              0x054F
- #define PCI_DEVICE_ID_NVIDIA_NVENET_28              0x07DC
- #define PCI_DEVICE_ID_NVIDIA_NVENET_29              0x07DD
- #define PCI_DEVICE_ID_NVIDIA_NVENET_30              0x07DE
- #define PCI_DEVICE_ID_NVIDIA_NVENET_31              0x07DF
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE       0x0560
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE       0x056C
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS    0x0752
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE       0x0759
- #define PCI_DEVICE_ID_NVIDIA_NVENET_32              0x0760
- #define PCI_DEVICE_ID_NVIDIA_NVENET_33              0x0761
- #define PCI_DEVICE_ID_NVIDIA_NVENET_34              0x0762
- #define PCI_DEVICE_ID_NVIDIA_NVENET_35              0x0763
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS     0x07D8
   #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS     0x0AA2
- #define PCI_DEVICE_ID_NVIDIA_NVENET_36              0x0AB0
- #define PCI_DEVICE_ID_NVIDIA_NVENET_37              0x0AB1
- #define PCI_DEVICE_ID_NVIDIA_NVENET_38              0x0AB2
- #define PCI_DEVICE_ID_NVIDIA_NVENET_39              0x0AB3
   
   #define PCI_VENDOR_ID_IMS             0x10e0
   #define PCI_DEVICE_ID_IMS_TT128               0x9128
@@@ -1314,6 -1278,13 +1278,13 @@@
   
   #define PCI_VENDOR_ID_CREATIVE                0x1102 /* duplicate: ECTIVA */
   #define PCI_DEVICE_ID_CREATIVE_EMU10K1        0x0002
+ #define PCI_DEVICE_ID_CREATIVE_20K1   0x0005
+ #define PCI_DEVICE_ID_CREATIVE_20K2   0x000b
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB0760      0x0024
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08801     0x0041
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08802     0x0042
+ #define PCI_SUBDEVICE_ID_CREATIVE_SB08803     0x0043
+ #define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX     0x6000
   
   #define PCI_VENDOR_ID_ECTIVA          0x1102 /* duplicate: CREATIVE */
   #define PCI_DEVICE_ID_ECTIVA_EV1938   0x8938
@@@ -1847,6 -1818,10 +1818,10 @@@
   #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO      0x0107
   #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2     0x0108
   
+ #define PCI_VENDOR_ID_DIGIGRAM                0x1369
+ #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM   0xc001
+ #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM       0xc002
+ 
   #define PCI_VENDOR_ID_KAWASAKI                0x136b
   #define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01
   
@@@ -1914,6 -1889,8 +1889,8 @@@
   #define PCI_SUBDEVICE_ID_CCD_SWYX4S   0xB540
   #define PCI_SUBDEVICE_ID_CCD_JH4S20   0xB550
   #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1 0xB552
+ #define PCI_SUBDEVICE_ID_CCD_JHSE1    0xB553
+ #define PCI_SUBDEVICE_ID_CCD_JH8S     0xB55B
   #define PCI_SUBDEVICE_ID_CCD_BN4S     0xB560
   #define PCI_SUBDEVICE_ID_CCD_BN8S     0xB562
   #define PCI_SUBDEVICE_ID_CCD_BNE1     0xB563
@@@ -1996,10 -1973,12 +1973,12 @@@
   #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U      0xC118
   #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU     0xC11C
   #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501
+ #define PCI_DEVICE_ID_OXSEMI_C950     0x950B
   #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511
   #define PCI_DEVICE_ID_OXSEMI_16PCI954PP       0x9513
   #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521
   #define PCI_DEVICE_ID_OXSEMI_16PCI952PP       0x9523
+ #define PCI_SUBDEVICE_ID_OXSEMI_C950  0x0001
   
   #define PCI_VENDOR_ID_CHELSIO         0x1425
   
@@@ -2113,6 -2092,7 +2092,7 @@@
   #define PCI_VENDOR_ID_MAINPINE                0x1522
   #define PCI_DEVICE_ID_MAINPINE_PBRIDGE        0x0100
   #define PCI_VENDOR_ID_ENE             0x1524
+ #define PCI_DEVICE_ID_ENE_CB710_FLASH 0x0510
   #define PCI_DEVICE_ID_ENE_CB712_SD    0x0550
   #define PCI_DEVICE_ID_ENE_CB712_SD_2  0x0551
   #define PCI_DEVICE_ID_ENE_CB714_SD    0x0750
@@@ -2274,6 -2254,8 +2254,8 @@@
   #define PCI_DEVICE_ID_MPC8547E                0x0018
   #define PCI_DEVICE_ID_MPC8545E                0x0019
   #define PCI_DEVICE_ID_MPC8545         0x001a
+ #define PCI_DEVICE_ID_MPC8569E                0x0061
+ #define PCI_DEVICE_ID_MPC8569         0x0060
   #define PCI_DEVICE_ID_MPC8568E                0x0020
   #define PCI_DEVICE_ID_MPC8568         0x0021
   #define PCI_DEVICE_ID_MPC8567E                0x0022
@@@ -2286,6 -2268,8 +2268,8 @@@
   #define PCI_DEVICE_ID_MPC8572         0x0041
   #define PCI_DEVICE_ID_MPC8536E                0x0050
   #define PCI_DEVICE_ID_MPC8536         0x0051
+ #define PCI_DEVICE_ID_P2020E          0x0070
+ #define PCI_DEVICE_ID_P2020           0x0071
   #define PCI_DEVICE_ID_MPC8641         0x7010
   #define PCI_DEVICE_ID_MPC8641D                0x7011
   #define PCI_DEVICE_ID_MPC8610         0x7018
@@@ -2313,6 -2297,8 +2297,8 @@@
   
   #define PCI_VENDOR_ID_QMI             0x1a32
   
+ #define PCI_VENDOR_ID_AZWAVE          0x1a3b
+ 
   #define PCI_VENDOR_ID_TEKRAM          0x1de1
   #define PCI_DEVICE_ID_TEKRAM_DC290    0xdc29
   
@@@ -2529,16 -2515,6 +2515,16 @@@
   #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
   #define PCI_DEVICE_ID_INTEL_IOAT_CNB  0x360b
   #define PCI_DEVICE_ID_INTEL_FBD_CNB   0x360c
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
+ +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
   #define PCI_DEVICE_ID_INTEL_ICH10_0   0x3a14
   #define PCI_DEVICE_ID_INTEL_ICH10_1   0x3a16
   #define PCI_DEVICE_ID_INTEL_ICH10_2   0x3a18
author	Dan Williams <dan.j.williams@intel.com>
	Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
committer	Dan Williams <dan.j.williams@intel.com>
	Wed, 9 Sep 2009 00:55:21 +0000 (17:55 -0700)
		1	2
arch/arm/include/asm/hardware/iop_adma.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/dmaengine.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/dmatest.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ioat/dma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ioat/dma.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ioat/dma_v2.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ioat/dma_v2.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ioat/pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/iop-adma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid5.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid5.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/dmaengine.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/pci_ids.h	patch \|	diff1 \|	diff2 \|	blob \| history