]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/net/ethernet/sfc/tx.c
Merge remote-tracking branch 'wireless-next/master'
[karo-tx-linux.git] / drivers / net / ethernet / sfc / tx.c
index a23ba0dbdd1d72eb6df65f62d1c955bba961d716..282692c48e6b6de94949e814ecdadf848186edc6 100644 (file)
 #include <net/ipv6.h>
 #include <linux/if_ether.h>
 #include <linux/highmem.h>
+#include <linux/cache.h>
 #include "net_driver.h"
 #include "efx.h"
+#include "io.h"
 #include "nic.h"
 #include "workarounds.h"
 #include "ef10_regs.h"
 
+#ifdef EFX_USE_PIO
+
+#define EFX_PIOBUF_SIZE_MAX ER_DZ_TX_PIOBUF_SIZE
+#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES)
+unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
+
+#endif /* EFX_USE_PIO */
+
+static inline unsigned int
+efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+{
+       return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+static inline struct efx_tx_buffer *
+__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+       return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+}
+
+static inline struct efx_tx_buffer *
+efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+       struct efx_tx_buffer *buffer =
+               __efx_tx_queue_get_insert_buffer(tx_queue);
+
+       EFX_BUG_ON_PARANOID(buffer->len);
+       EFX_BUG_ON_PARANOID(buffer->flags);
+       EFX_BUG_ON_PARANOID(buffer->unmap_len);
+
+       return buffer;
+}
+
 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
                               struct efx_tx_buffer *buffer,
                               unsigned int *pkts_compl,
@@ -148,6 +183,145 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
        }
 }
 
+#ifdef EFX_USE_PIO
+
+struct efx_short_copy_buffer {
+       int used;
+       u8 buf[L1_CACHE_BYTES];
+};
+
+/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
+ */
+static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
+                                   u8 *data, int len,
+                                   struct efx_short_copy_buffer *copy_buf)
+{
+       int block_len = len & ~(sizeof(copy_buf->buf) - 1);
+
+       memcpy_toio(*piobuf, data, block_len);
+       *piobuf += block_len;
+       len -= block_len;
+
+       if (len) {
+               data += block_len;
+               BUG_ON(copy_buf->used);
+               BUG_ON(len > sizeof(copy_buf->buf));
+               memcpy(copy_buf->buf, data, len);
+               copy_buf->used = len;
+       }
+}
+
+/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
+ */
+static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
+                                      u8 *data, int len,
+                                      struct efx_short_copy_buffer *copy_buf)
+{
+       if (copy_buf->used) {
+               /* if the copy buffer is partially full, fill it up and write */
+               int copy_to_buf =
+                       min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
+
+               memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
+               copy_buf->used += copy_to_buf;
+
+               /* if we didn't fill it up then we're done for now */
+               if (copy_buf->used < sizeof(copy_buf->buf))
+                       return;
+
+               memcpy_toio(*piobuf, copy_buf->buf, sizeof(copy_buf->buf));
+               *piobuf += sizeof(copy_buf->buf);
+               data += copy_to_buf;
+               len -= copy_to_buf;
+               copy_buf->used = 0;
+       }
+
+       efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
+}
+
+static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
+                                 struct efx_short_copy_buffer *copy_buf)
+{
+       /* if there's anything in it, write the whole buffer, including junk */
+       if (copy_buf->used)
+               memcpy_toio(piobuf, copy_buf->buf, sizeof(copy_buf->buf));
+}
+
+/* Traverse skb structure and copy fragments in to PIO buffer.
+ * Advances piobuf pointer.
+ */
+static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
+                                    u8 __iomem **piobuf,
+                                    struct efx_short_copy_buffer *copy_buf)
+{
+       int i;
+
+       efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
+                               copy_buf);
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+               u8 *vaddr;
+
+               vaddr = kmap_atomic(skb_frag_page(f));
+
+               efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
+                                          skb_frag_size(f), copy_buf);
+               kunmap_atomic(vaddr);
+       }
+
+       EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list);
+}
+
+static struct efx_tx_buffer *
+efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+       struct efx_tx_buffer *buffer =
+               efx_tx_queue_get_insert_buffer(tx_queue);
+       u8 __iomem *piobuf = tx_queue->piobuf;
+
+       /* Copy to PIO buffer. Ensure the writes are padded to the end
+        * of a cache line, as this is required for write-combining to be
+        * effective on at least x86.
+        */
+
+       if (skb_shinfo(skb)->nr_frags) {
+               /* The size of the copy buffer will ensure all writes
+                * are the size of a cache line.
+                */
+               struct efx_short_copy_buffer copy_buf;
+
+               copy_buf.used = 0;
+
+               efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
+                                        &piobuf, &copy_buf);
+               efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
+       } else {
+               /* Pad the write to the size of a cache line.
+                * We can do this because we know the skb_shared_info sruct is
+                * after the source, and the destination buffer is big enough.
+                */
+               BUILD_BUG_ON(L1_CACHE_BYTES >
+                            SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+               memcpy_toio(tx_queue->piobuf, skb->data,
+                           ALIGN(skb->len, L1_CACHE_BYTES));
+       }
+
+       EFX_POPULATE_QWORD_5(buffer->option,
+                            ESF_DZ_TX_DESC_IS_OPT, 1,
+                            ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
+                            ESF_DZ_TX_PIO_CONT, 0,
+                            ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
+                            ESF_DZ_TX_PIO_BUF_ADDR,
+                            tx_queue->piobuf_offset);
+       ++tx_queue->pio_packets;
+       ++tx_queue->insert_count;
+       return buffer;
+}
+#endif /* EFX_USE_PIO */
+
 /*
  * Add a socket buffer to a TX queue
  *
@@ -170,7 +344,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
        struct device *dma_dev = &efx->pci_dev->dev;
        struct efx_tx_buffer *buffer;
        skb_frag_t *fragment;
-       unsigned int len, unmap_len = 0, insert_ptr;
+       unsigned int len, unmap_len = 0;
        dma_addr_t dma_addr, unmap_addr = 0;
        unsigned int dma_len;
        unsigned short dma_flags;
@@ -192,6 +366,17 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
                        return NETDEV_TX_OK;
        }
 
+       /* Consider using PIO for short packets */
+#ifdef EFX_USE_PIO
+       if (skb->len <= efx_piobuf_size && tx_queue->piobuf &&
+           efx_nic_tx_is_empty(tx_queue) &&
+           efx_nic_tx_is_empty(efx_tx_queue_partner(tx_queue))) {
+               buffer = efx_enqueue_skb_pio(tx_queue, skb);
+               dma_flags = EFX_TX_BUF_OPTION;
+               goto finish_packet;
+       }
+#endif
+
        /* Map for DMA.  Use dma_map_single rather than dma_map_page
         * since this is more efficient on machines with sparse
         * memory.
@@ -211,11 +396,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 
                /* Add to TX queue, splitting across DMA boundaries */
                do {
-                       insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
-                       buffer = &tx_queue->buffer[insert_ptr];
-                       EFX_BUG_ON_PARANOID(buffer->flags);
-                       EFX_BUG_ON_PARANOID(buffer->len);
-                       EFX_BUG_ON_PARANOID(buffer->unmap_len);
+                       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
 
                        dma_len = efx_max_tx_len(efx, dma_addr);
                        if (likely(dma_len >= len))
@@ -248,6 +429,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
        }
 
        /* Transfer ownership of the skb to the final buffer */
+finish_packet:
        buffer->skb = skb;
        buffer->flags = EFX_TX_BUF_SKB | dma_flags;
 
@@ -273,8 +455,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
        while (tx_queue->insert_count != tx_queue->write_count) {
                unsigned int pkts_compl = 0, bytes_compl = 0;
                --tx_queue->insert_count;
-               insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
-               buffer = &tx_queue->buffer[insert_ptr];
+               buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
                efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
        }
 
@@ -745,23 +926,18 @@ static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
 {
        struct efx_tx_buffer *buffer;
        struct efx_nic *efx = tx_queue->efx;
-       unsigned dma_len, insert_ptr;
+       unsigned dma_len;
 
        EFX_BUG_ON_PARANOID(len <= 0);
 
        while (1) {
-               insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
-               buffer = &tx_queue->buffer[insert_ptr];
+               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
                ++tx_queue->insert_count;
 
                EFX_BUG_ON_PARANOID(tx_queue->insert_count -
                                    tx_queue->read_count >=
                                    efx->txq_entries);
 
-               EFX_BUG_ON_PARANOID(buffer->len);
-               EFX_BUG_ON_PARANOID(buffer->unmap_len);
-               EFX_BUG_ON_PARANOID(buffer->flags);
-
                buffer->dma_addr = dma_addr;
 
                dma_len = efx_max_tx_len(efx, dma_addr);
@@ -822,8 +998,7 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
        /* Work backwards until we hit the original insert pointer value */
        while (tx_queue->insert_count != tx_queue->write_count) {
                --tx_queue->insert_count;
-               buffer = &tx_queue->buffer[tx_queue->insert_count &
-                                          tx_queue->ptr_mask];
+               buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
                efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
        }
 }
@@ -968,7 +1143,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
                                struct tso_state *st)
 {
        struct efx_tx_buffer *buffer =
-               &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
+               efx_tx_queue_get_insert_buffer(tx_queue);
        bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
        u8 tcp_flags_clear;
 
@@ -1038,8 +1213,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
                /* We mapped the headers in tso_start().  Unmap them
                 * when the last segment is completed.
                 */
-               buffer = &tx_queue->buffer[tx_queue->insert_count &
-                                          tx_queue->ptr_mask];
+               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
                buffer->dma_addr = st->header_dma_addr;
                buffer->len = st->header_len;
                if (is_last) {