]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
usb: ehci: make HC see up-to-date qh/qtd descriptor ASAP
authorMing Lei <ming.lei@canonical.com>
Tue, 30 Aug 2011 16:03:13 +0000 (00:03 +0800)
committerLothar Waßmann <LW@KARO-electronics.de>
Fri, 24 May 2013 06:33:10 +0000 (08:33 +0200)
This patch introduces the helper of ehci_sync_mem to flush
qtd/qh into memory immediately on some ARM, so that HC can
see the up-to-date qtd/qh descriptor asap.

This patch fixs one performance bug on ARM Cortex A9 dual core
platform, which has been reported on quite a few ARM machines
(OMAP4, Tegra 2, snowball...), see details from link of
https://bugs.launchpad.net/bugs/709245.

The patch has been tested ok on OMAP4 panda A1 board, and the
performance of 'dd' over usb mass storage can be increased from
4~5MB/sec to 14~16MB/sec after applying this patch.

Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
drivers/usb/host/ehci-q.c
drivers/usb/host/ehci.h

index 2499b3bce360101b9cc91745ee8bde707961681f..e4dd26a8b2b14f6529f5f6e8c39908ef7059bab2 100644 (file)
@@ -995,6 +995,12 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        head->qh_next.qh = qh;
        head->hw->hw_next = dma;
 
+       /*
+        * flush qh descriptor into memory immediately,
+        * see comments in qh_append_tds.
+        * */
+       ehci_sync_mem();
+
        qh_get(qh);
        qh->xacterrs = 0;
        qh->qh_state = QH_STATE_LINKED;
@@ -1082,6 +1088,18 @@ static struct ehci_qh *qh_append_tds (
                        wmb ();
                        dummy->hw_token = token;
 
+                       /*
+                        * Writing to dma coherent buffer on ARM may
+                        * be delayed to reach memory, so HC may not see
+                        * hw_token of dummy qtd in time, which can cause
+                        * the qtd transaction to be executed very late,
+                        * and degrade performance a lot. ehci_sync_mem
+                        * is added to flush 'token' immediatelly into
+                        * memory, so that ehci can execute the transaction
+                        * ASAP.
+                        * */
+                       ehci_sync_mem();
+
                        urb->hcpriv = qh_get (qh);
                }
        }
index 3c3503acae5816dda633a58eaa3f98d70f533b17..1a35675facefad05d37b54f7d4f841a5f067db27 100644 (file)
@@ -769,6 +769,23 @@ static inline unsigned ehci_read_frame_index(struct ehci_hcd *ehci)
 
 #endif
 
+/*
+ * Writing to dma coherent memory on ARM may be delayed via L2
+ * writing buffer, so introduce the helper which can flush L2 writing
+ * buffer into memory immediately, especially used to flush ehci
+ * descriptor to memory.
+ * */
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+static inline void ehci_sync_mem()
+{
+       mb();
+}
+#else
+static inline void ehci_sync_mem()
+{
+}
+#endif
+
 /*-------------------------------------------------------------------------*/
 
 #ifndef DEBUG