]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
net: stmmac: adding multiple napi mechanism
[karo-tx-linux.git] / drivers / net / ethernet / stmicro / stmmac / stmmac_main.c
index 4498a3861aa3ad09460e922bd7f38e3506889dcb..a89f76b27ea2b34ba33acb6f8f7cf394cde04204 100644 (file)
@@ -138,6 +138,64 @@ static void stmmac_verify_args(void)
                eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 }
 
+/**
+ * stmmac_disable_all_queues - Disable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_queues_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               napi_disable(&rx_q->napi);
+       }
+}
+
+/**
+ * stmmac_enable_all_queues - Enable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_enable_all_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_queues_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               napi_enable(&rx_q->napi);
+       }
+}
+
+/**
+ * stmmac_stop_all_queues - Stop all queues
+ * @priv: driver private structure
+ */
+static void stmmac_stop_all_queues(struct stmmac_priv *priv)
+{
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_cnt; queue++)
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
+}
+
+/**
+ * stmmac_start_all_queues - Start all queues
+ * @priv: driver private structure
+ */
+static void stmmac_start_all_queues(struct stmmac_priv *priv)
+{
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_cnt; queue++)
+               netif_tx_start_queue(netdev_get_tx_queue(priv->dev, queue));
+}
+
 /**
  * stmmac_clk_csr_set - dynamically set the MDC clock
  * @priv: driver private structure
@@ -185,26 +243,33 @@ static void print_pkt(unsigned char *buf, int len)
        print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, buf, len);
 }
 
-static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
+static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        u32 avail;
 
-       if (priv->dirty_tx > priv->cur_tx)
-               avail = priv->dirty_tx - priv->cur_tx - 1;
+       if (tx_q->dirty_tx > tx_q->cur_tx)
+               avail = tx_q->dirty_tx - tx_q->cur_tx - 1;
        else
-               avail = DMA_TX_SIZE - priv->cur_tx + priv->dirty_tx - 1;
+               avail = DMA_TX_SIZE - tx_q->cur_tx + tx_q->dirty_tx - 1;
 
        return avail;
 }
 
-static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv)
+/**
+ * stmmac_rx_dirty - Get RX queue dirty
+ * @priv: driver private structure
+ * @queue: RX queue index
+ */
+static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        u32 dirty;
 
-       if (priv->dirty_rx <= priv->cur_rx)
-               dirty = priv->cur_rx - priv->dirty_rx;
+       if (rx_q->dirty_rx <= rx_q->cur_rx)
+               dirty = rx_q->cur_rx - rx_q->dirty_rx;
        else
-               dirty = DMA_RX_SIZE - priv->dirty_rx + priv->cur_rx;
+               dirty = DMA_RX_SIZE - rx_q->dirty_rx + rx_q->cur_rx;
 
        return dirty;
 }
@@ -232,9 +297,19 @@ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
  */
 static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
 {
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       /* check if all TX queues have the work finished */
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               if (tx_q->dirty_tx != tx_q->cur_tx)
+                       return; /* still unfinished work */
+       }
+
        /* Check and enter in LPI mode */
-       if ((priv->dirty_tx == priv->cur_tx) &&
-           (priv->tx_path_in_lpi_mode == false))
+       if (!priv->tx_path_in_lpi_mode)
                priv->hw->mac->set_eee_mode(priv->hw,
                                            priv->plat->en_tx_lpi_clockgating);
 }
@@ -672,6 +747,19 @@ static void stmmac_release_ptp(struct stmmac_priv *priv)
        stmmac_ptp_unregister(priv);
 }
 
+/**
+ *  stmmac_mac_flow_ctrl - Configure flow control in all queues
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the flow control in all queues
+ */
+static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+
+       priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl,
+                                priv->pause, tx_cnt);
+}
+
 /**
  * stmmac_adjust_link - adjusts the link parameters
  * @dev: net device structure
@@ -687,7 +775,6 @@ static void stmmac_adjust_link(struct net_device *dev)
        struct phy_device *phydev = dev->phydev;
        unsigned long flags;
        int new_state = 0;
-       unsigned int fc = priv->flow_ctrl, pause_time = priv->pause;
 
        if (!phydev)
                return;
@@ -709,8 +796,7 @@ static void stmmac_adjust_link(struct net_device *dev)
                }
                /* Flow Control operation */
                if (phydev->pause)
-                       priv->hw->mac->flow_ctrl(priv->hw, phydev->duplex,
-                                                fc, pause_time);
+                       stmmac_mac_flow_ctrl(priv, phydev->duplex);
 
                if (phydev->speed != priv->speed) {
                        new_state = 1;
@@ -878,22 +964,56 @@ static int stmmac_init_phy(struct net_device *dev)
        return 0;
 }
 
-static void stmmac_display_rings(struct stmmac_priv *priv)
+static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 {
-       void *head_rx, *head_tx;
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       void *head_rx;
+       u32 queue;
 
-       if (priv->extend_desc) {
-               head_rx = (void *)priv->dma_erx;
-               head_tx = (void *)priv->dma_etx;
-       } else {
-               head_rx = (void *)priv->dma_rx;
-               head_tx = (void *)priv->dma_tx;
+       /* Display RX rings */
+       for (queue = 0; queue < rx_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               pr_info("\tRX Queue %u rings\n", queue);
+
+               if (priv->extend_desc)
+                       head_rx = (void *)rx_q->dma_erx;
+               else
+                       head_rx = (void *)rx_q->dma_rx;
+
+               /* Display RX ring */
+               priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
        }
+}
 
-       /* Display Rx ring */
-       priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
-       /* Display Tx ring */
-       priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
+static void stmmac_display_tx_rings(struct stmmac_priv *priv)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       void *head_tx;
+       u32 queue;
+
+       /* Display TX rings */
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               pr_info("\tTX Queue %d rings\n", queue);
+
+               if (priv->extend_desc)
+                       head_tx = (void *)tx_q->dma_etx;
+               else
+                       head_tx = (void *)tx_q->dma_tx;
+
+               priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
+       }
+}
+
+static void stmmac_display_rings(struct stmmac_priv *priv)
+{
+       /* Display RX ring */
+       stmmac_display_rx_rings(priv);
+
+       /* Display TX ring */
+       stmmac_display_tx_rings(priv);
 }
 
 static int stmmac_set_bfsize(int mtu, int bufsize)
@@ -913,48 +1033,88 @@ static int stmmac_set_bfsize(int mtu, int bufsize)
 }
 
 /**
- * stmmac_clear_descriptors - clear descriptors
+ * stmmac_clear_rx_descriptors - clear RX descriptors
  * @priv: driver private structure
- * Description: this function is called to clear the tx and rx descriptors
+ * @queue: RX queue index
+ * Description: this function is called to clear the RX descriptors
  * in case of both basic and extended descriptors are used.
  */
-static void stmmac_clear_descriptors(struct stmmac_priv *priv)
+static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        int i;
 
-       /* Clear the Rx/Tx descriptors */
+       /* Clear the RX descriptors */
        for (i = 0; i < DMA_RX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_rx_desc(&priv->dma_erx[i].basic,
+                       priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic,
                                                     priv->use_riwt, priv->mode,
                                                     (i == DMA_RX_SIZE - 1));
                else
-                       priv->hw->desc->init_rx_desc(&priv->dma_rx[i],
+                       priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i],
                                                     priv->use_riwt, priv->mode,
                                                     (i == DMA_RX_SIZE - 1));
+}
+
+/**
+ * stmmac_clear_tx_descriptors - clear tx descriptors
+ * @priv: driver private structure
+ * @queue: TX queue index.
+ * Description: this function is called to clear the TX descriptors
+ * in case of both basic and extended descriptors are used.
+ */
+static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       int i;
+
+       /* Clear the TX descriptors */
        for (i = 0; i < DMA_TX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
                else
-                       priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
 }
 
+/**
+ * stmmac_clear_descriptors - clear descriptors
+ * @priv: driver private structure
+ * Description: this function is called to clear the TX and RX descriptors
+ * in case of both basic and extended descriptors are used.
+ */
+static void stmmac_clear_descriptors(struct stmmac_priv *priv)
+{
+       u32 rx_queue_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       /* Clear the RX descriptors */
+       for (queue = 0; queue < rx_queue_cnt; queue++)
+               stmmac_clear_rx_descriptors(priv, queue);
+
+       /* Clear the TX descriptors */
+       for (queue = 0; queue < tx_queue_cnt; queue++)
+               stmmac_clear_tx_descriptors(priv, queue);
+}
+
 /**
  * stmmac_init_rx_buffers - init the RX descriptor buffer.
  * @priv: driver private structure
  * @p: descriptor pointer
  * @i: descriptor index
- * @flags: gfp flag.
+ * @flags: gfp flag
+ * @queue: RX queue index
  * Description: this function is called to allocate a receive buffer, perform
  * the DMA mapping and init the descriptor.
  */
 static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
-                                 int i, gfp_t flags)
+                                 int i, gfp_t flags, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        struct sk_buff *skb;
 
        skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
@@ -963,20 +1123,20 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
                           "%s: Rx init fails; skb is NULL\n", __func__);
                return -ENOMEM;
        }
-       priv->rx_skbuff[i] = skb;
-       priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
+       rx_q->rx_skbuff[i] = skb;
+       rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
                                                priv->dma_buf_sz,
                                                DMA_FROM_DEVICE);
-       if (dma_mapping_error(priv->device, priv->rx_skbuff_dma[i])) {
+       if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) {
                netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
                dev_kfree_skb_any(skb);
                return -EINVAL;
        }
 
        if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[i]);
+               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
        else
-               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[i]);
+               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
 
        if ((priv->hw->mode->init_desc3) &&
            (priv->dma_buf_sz == BUF_SIZE_16KiB))
@@ -985,30 +1145,71 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
        return 0;
 }
 
-static void stmmac_free_rx_buffers(struct stmmac_priv *priv, int i)
+/**
+ * stmmac_free_rx_buffer - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ * @i: buffer index.
+ */
+static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
-       if (priv->rx_skbuff[i]) {
-               dma_unmap_single(priv->device, priv->rx_skbuff_dma[i],
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+       if (rx_q->rx_skbuff[i]) {
+               dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i],
                                 priv->dma_buf_sz, DMA_FROM_DEVICE);
-               dev_kfree_skb_any(priv->rx_skbuff[i]);
+               dev_kfree_skb_any(rx_q->rx_skbuff[i]);
        }
-       priv->rx_skbuff[i] = NULL;
+       rx_q->rx_skbuff[i] = NULL;
 }
 
 /**
- * init_dma_desc_rings - init the RX/TX descriptor rings
+ * stmmac_free_tx_buffer - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ * @i: buffer index.
+ */
+static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+       if (tx_q->tx_skbuff_dma[i].buf) {
+               if (tx_q->tx_skbuff_dma[i].map_as_page)
+                       dma_unmap_page(priv->device,
+                                      tx_q->tx_skbuff_dma[i].buf,
+                                      tx_q->tx_skbuff_dma[i].len,
+                                      DMA_TO_DEVICE);
+               else
+                       dma_unmap_single(priv->device,
+                                        tx_q->tx_skbuff_dma[i].buf,
+                                        tx_q->tx_skbuff_dma[i].len,
+                                        DMA_TO_DEVICE);
+       }
+
+       if (tx_q->tx_skbuff[i]) {
+               dev_kfree_skb_any(tx_q->tx_skbuff[i]);
+               tx_q->tx_skbuff[i] = NULL;
+               tx_q->tx_skbuff_dma[i].buf = 0;
+               tx_q->tx_skbuff_dma[i].map_as_page = false;
+       }
+}
+
+/**
+ * init_dma_rx_desc_rings - init the RX descriptor rings
  * @dev: net device structure
  * @flags: gfp flag.
- * Description: this function initializes the DMA RX/TX descriptors
+ * Description: this function initializes the DMA RX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
+static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 {
-       int i;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_count = priv->plat->rx_queues_to_use;
        unsigned int bfsize = 0;
        int ret = -ENOMEM;
+       u32 queue;
+       int i;
 
        if (priv->hw->mode->set_16kib_bfsize)
                bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu);
@@ -1018,257 +1219,516 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 
        priv->dma_buf_sz = bfsize;
 
-       netif_dbg(priv, probe, priv->dev,
-                 "(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n",
-                 __func__, (u32)priv->dma_rx_phy, (u32)priv->dma_tx_phy);
-
        /* RX INITIALIZATION */
        netif_dbg(priv, probe, priv->dev,
                  "SKB addresses:\nskb\t\tskb data\tdma data\n");
 
-       for (i = 0; i < DMA_RX_SIZE; i++) {
-               struct dma_desc *p;
-               if (priv->extend_desc)
-                       p = &((priv->dma_erx + i)->basic);
-               else
-                       p = priv->dma_rx + i;
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               netif_dbg(priv, probe, priv->dev,
+                         "(%s) dma_rx_phy=0x%08x\n", __func__,
+                         (u32)rx_q->dma_rx_phy);
 
-               ret = stmmac_init_rx_buffers(priv, p, i, flags);
-               if (ret)
-                       goto err_init_rx_buffers;
+               for (i = 0; i < DMA_RX_SIZE; i++) {
+                       struct dma_desc *p;
 
-               netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
-                         priv->rx_skbuff[i], priv->rx_skbuff[i]->data,
-                         (unsigned int)priv->rx_skbuff_dma[i]);
+                       if (priv->extend_desc)
+                               p = &((rx_q->dma_erx + i)->basic);
+                       else
+                               p = rx_q->dma_rx + i;
+
+                       ret = stmmac_init_rx_buffers(priv, p, i, flags,
+                                                    queue);
+                       if (ret)
+                               goto err_init_rx_buffers;
+
+                       netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
+                                 rx_q->rx_skbuff[i], rx_q->rx_skbuff[i]->data,
+                                 (unsigned int)rx_q->rx_skbuff_dma[i]);
+               }
+
+               rx_q->cur_rx = 0;
+               rx_q->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
+
+               stmmac_clear_rx_descriptors(priv, queue);
+
+               /* Setup the chained descriptor addresses */
+               if (priv->mode == STMMAC_CHAIN_MODE) {
+                       if (priv->extend_desc)
+                               priv->hw->mode->init(rx_q->dma_erx,
+                                                    rx_q->dma_rx_phy,
+                                                    DMA_RX_SIZE, 1);
+                       else
+                               priv->hw->mode->init(rx_q->dma_rx,
+                                                    rx_q->dma_rx_phy,
+                                                    DMA_RX_SIZE, 0);
+               }
        }
-       priv->cur_rx = 0;
-       priv->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
+
        buf_sz = bfsize;
 
-       /* Setup the chained descriptor addresses */
-       if (priv->mode == STMMAC_CHAIN_MODE) {
-               if (priv->extend_desc) {
-                       priv->hw->mode->init(priv->dma_erx, priv->dma_rx_phy,
-                                            DMA_RX_SIZE, 1);
-                       priv->hw->mode->init(priv->dma_etx, priv->dma_tx_phy,
-                                            DMA_TX_SIZE, 1);
-               } else {
-                       priv->hw->mode->init(priv->dma_rx, priv->dma_rx_phy,
-                                            DMA_RX_SIZE, 0);
-                       priv->hw->mode->init(priv->dma_tx, priv->dma_tx_phy,
-                                            DMA_TX_SIZE, 0);
-               }
+       return 0;
+
+err_init_rx_buffers:
+       while (queue >= 0) {
+               while (--i >= 0)
+                       stmmac_free_rx_buffer(priv, queue, i);
+
+               if (queue == 0)
+                       break;
+
+               i = DMA_RX_SIZE;
+               queue--;
        }
 
-       /* TX INITIALIZATION */
-       for (i = 0; i < DMA_TX_SIZE; i++) {
-               struct dma_desc *p;
-               if (priv->extend_desc)
-                       p = &((priv->dma_etx + i)->basic);
-               else
-                       p = priv->dma_tx + i;
+       return ret;
+}
 
-               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-                       p->des0 = 0;
-                       p->des1 = 0;
-                       p->des2 = 0;
-                       p->des3 = 0;
-               } else {
-                       p->des2 = 0;
+/**
+ * init_dma_tx_desc_rings - init the TX descriptor rings
+ * @dev: net device structure.
+ * Description: this function initializes the DMA TX descriptors
+ * and allocates the socket buffers. It supports the chained and ring
+ * modes.
+ */
+static int init_dma_tx_desc_rings(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+       int i;
+
+       for (queue = 0; queue < tx_queue_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               netif_dbg(priv, probe, priv->dev,
+                         "(%s) dma_tx_phy=0x%08x\n", __func__,
+                        (u32)tx_q->dma_tx_phy);
+
+               /* Setup the chained descriptor addresses */
+               if (priv->mode == STMMAC_CHAIN_MODE) {
+                       if (priv->extend_desc)
+                               priv->hw->mode->init(tx_q->dma_etx,
+                                                    tx_q->dma_tx_phy,
+                                                    DMA_TX_SIZE, 1);
+                       else
+                               priv->hw->mode->init(tx_q->dma_tx,
+                                                    tx_q->dma_tx_phy,
+                                                    DMA_TX_SIZE, 0);
                }
 
-               priv->tx_skbuff_dma[i].buf = 0;
-               priv->tx_skbuff_dma[i].map_as_page = false;
-               priv->tx_skbuff_dma[i].len = 0;
-               priv->tx_skbuff_dma[i].last_segment = false;
-               priv->tx_skbuff[i] = NULL;
+               for (i = 0; i < DMA_TX_SIZE; i++) {
+                       struct dma_desc *p;
+                       if (priv->extend_desc)
+                               p = &((tx_q->dma_etx + i)->basic);
+                       else
+                               p = tx_q->dma_tx + i;
+
+                       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+                               p->des0 = 0;
+                               p->des1 = 0;
+                               p->des2 = 0;
+                               p->des3 = 0;
+                       } else {
+                               p->des2 = 0;
+                       }
+
+                       tx_q->tx_skbuff_dma[i].buf = 0;
+                       tx_q->tx_skbuff_dma[i].map_as_page = false;
+                       tx_q->tx_skbuff_dma[i].len = 0;
+                       tx_q->tx_skbuff_dma[i].last_segment = false;
+                       tx_q->tx_skbuff[i] = NULL;
+               }
+
+               tx_q->dirty_tx = 0;
+               tx_q->cur_tx = 0;
+
+               netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
-       netdev_reset_queue(priv->dev);
+       return 0;
+}
+
+/**
+ * init_dma_desc_rings - init the RX/TX descriptor rings
+ * @dev: net device structure
+ * @flags: gfp flag.
+ * Description: this function initializes the DMA RX/TX descriptors
+ * and allocates the socket buffers. It supports the chained and ring
+ * modes.
+ */
+static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int ret;
+
+       ret = init_dma_rx_desc_rings(dev, flags);
+       if (ret)
+               return ret;
+
+       ret = init_dma_tx_desc_rings(dev);
 
        stmmac_clear_descriptors(priv);
 
        if (netif_msg_hw(priv))
                stmmac_display_rings(priv);
 
-       return 0;
-err_init_rx_buffers:
-       while (--i >= 0)
-               stmmac_free_rx_buffers(priv, i);
        return ret;
 }
 
-static void dma_free_rx_skbufs(struct stmmac_priv *priv)
+/**
+ * dma_free_rx_skbufs - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
        int i;
 
        for (i = 0; i < DMA_RX_SIZE; i++)
-               stmmac_free_rx_buffers(priv, i);
+               stmmac_free_rx_buffer(priv, queue, i);
 }
 
-static void dma_free_tx_skbufs(struct stmmac_priv *priv)
+/**
+ * dma_free_tx_skbufs - free TX dma buffers
+ * @priv: private structure
+ * @queue: TX queue index
+ */
+static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
        int i;
 
-       for (i = 0; i < DMA_TX_SIZE; i++) {
-               if (priv->tx_skbuff_dma[i].buf) {
-                       if (priv->tx_skbuff_dma[i].map_as_page)
-                               dma_unmap_page(priv->device,
-                                              priv->tx_skbuff_dma[i].buf,
-                                              priv->tx_skbuff_dma[i].len,
-                                              DMA_TO_DEVICE);
-                       else
-                               dma_unmap_single(priv->device,
-                                                priv->tx_skbuff_dma[i].buf,
-                                                priv->tx_skbuff_dma[i].len,
-                                                DMA_TO_DEVICE);
+       for (i = 0; i < DMA_TX_SIZE; i++)
+               stmmac_free_tx_buffer(priv, queue, i);
+}
+
+/**
+ * free_dma_rx_desc_resources - free RX dma desc resources
+ * @priv: private structure
+ */
+static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       /* Free RX queue resources */
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               /* Release the DMA RX socket buffers */
+               dma_free_rx_skbufs(priv, queue);
+
+               /* Free DMA regions of consistent memory previously allocated */
+               if (!priv->extend_desc)
+                       dma_free_coherent(priv->device,
+                                         DMA_RX_SIZE * sizeof(struct dma_desc),
+                                         rx_q->dma_rx, rx_q->dma_rx_phy);
+               else
+                       dma_free_coherent(priv->device, DMA_RX_SIZE *
+                                         sizeof(struct dma_extended_desc),
+                                         rx_q->dma_erx, rx_q->dma_rx_phy);
+
+               kfree(rx_q->rx_skbuff_dma);
+               kfree(rx_q->rx_skbuff);
+       }
+}
+
+/**
+ * free_dma_tx_desc_resources - free TX dma desc resources
+ * @priv: private structure
+ */
+static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue = 0;
+
+       /* Free TX queue resources */
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               /* Release the DMA TX socket buffers */
+               dma_free_tx_skbufs(priv, queue);
+
+               /* Free DMA regions of consistent memory previously allocated */
+               if (!priv->extend_desc)
+                       dma_free_coherent(priv->device,
+                                         DMA_TX_SIZE * sizeof(struct dma_desc),
+                                         tx_q->dma_tx, tx_q->dma_tx_phy);
+               else
+                       dma_free_coherent(priv->device, DMA_TX_SIZE *
+                                         sizeof(struct dma_extended_desc),
+                                         tx_q->dma_etx, tx_q->dma_tx_phy);
+
+               kfree(tx_q->tx_skbuff_dma);
+               kfree(tx_q->tx_skbuff);
+       }
+}
+
+/**
+ * alloc_dma_rx_desc_resources - alloc RX resources.
+ * @priv: private structure
+ * Description: according to which descriptor can be used (extend or basic)
+ * this function allocates the resources for TX and RX paths. In case of
+ * reception, for example, it pre-allocated the RX socket buffer in order to
+ * allow zero-copy mechanism.
+ */
+static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       int ret = -ENOMEM;
+       u32 queue;
+
+       /* RX queues buffers and DMA */
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               rx_q->queue_index = queue;
+               rx_q->priv_data = priv;
+
+               rx_q->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE,
+                                                   sizeof(dma_addr_t),
+                                                   GFP_KERNEL);
+               if (!rx_q->rx_skbuff_dma)
+                       return -ENOMEM;
+
+               rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
+                                               sizeof(struct sk_buff *),
+                                               GFP_KERNEL);
+               if (!rx_q->rx_skbuff)
+                       goto err_dma;
+
+               if (priv->extend_desc) {
+                       rx_q->dma_erx = dma_zalloc_coherent(priv->device,
+                                                           DMA_RX_SIZE *
+                                                           sizeof(struct
+                                                           dma_extended_desc),
+                                                           &rx_q->dma_rx_phy,
+                                                           GFP_KERNEL);
+                       if (!rx_q->dma_erx)
+                               goto err_dma;
+
+               } else {
+                       rx_q->dma_rx = dma_zalloc_coherent(priv->device,
+                                                          DMA_RX_SIZE *
+                                                          sizeof(struct
+                                                          dma_desc),
+                                                          &rx_q->dma_rx_phy,
+                                                          GFP_KERNEL);
+                       if (!rx_q->dma_rx)
+                               goto err_dma;
                }
+       }
+
+       return 0;
+
+err_dma:
+       free_dma_rx_desc_resources(priv);
+
+       return ret;
+}
+
+/**
+ * alloc_dma_tx_desc_resources - alloc TX resources.
+ * @priv: private structure
+ * Description: according to which descriptor can be used (extend or basic)
+ * this function allocates the resources for TX and RX paths. In case of
+ * reception, for example, it pre-allocated the RX socket buffer in order to
+ * allow zero-copy mechanism.
+ */
+static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       int ret = -ENOMEM;
+       u32 queue;
+
+       /* TX queues buffers and DMA */
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               tx_q->queue_index = queue;
+               tx_q->priv_data = priv;
+
+               tx_q->tx_skbuff_dma = kmalloc_array(DMA_TX_SIZE,
+                                                   sizeof(*tx_q->tx_skbuff_dma),
+                                                   GFP_KERNEL);
+               if (!tx_q->tx_skbuff_dma)
+                       return -ENOMEM;
+
+               tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
+                                               sizeof(struct sk_buff *),
+                                               GFP_KERNEL);
+               if (!tx_q->tx_skbuff)
+                       goto err_dma_buffers;
+
+               if (priv->extend_desc) {
+                       tx_q->dma_etx = dma_zalloc_coherent(priv->device,
+                                                           DMA_TX_SIZE *
+                                                           sizeof(struct
+                                                           dma_extended_desc),
+                                                           &tx_q->dma_tx_phy,
+                                                           GFP_KERNEL);
+                       if (!tx_q->dma_etx)
+                               goto err_dma_buffers;
+               } else {
+                       tx_q->dma_tx = dma_zalloc_coherent(priv->device,
+                                                          DMA_TX_SIZE *
+                                                          sizeof(struct
+                                                                 dma_desc),
+                                                          &tx_q->dma_tx_phy,
+                                                          GFP_KERNEL);
+                       if (!tx_q->dma_tx)
+                               goto err_dma_buffers;
+               }
+       }
+
+       return 0;
+
+err_dma_buffers:
+       free_dma_tx_desc_resources(priv);
+
+       return ret;
+}
+
+/**
+ * alloc_dma_desc_resources - alloc TX/RX resources.
+ * @priv: private structure
+ * Description: according to which descriptor can be used (extend or basic)
+ * this function allocates the resources for TX and RX paths. In case of
+ * reception, for example, it pre-allocated the RX socket buffer in order to
+ * allow zero-copy mechanism.
+ */
+static int alloc_dma_desc_resources(struct stmmac_priv *priv)
+{
+       /* RX Allocation */
+       int ret = alloc_dma_rx_desc_resources(priv);
+
+       if (ret)
+               return ret;
+
+       ret = alloc_dma_tx_desc_resources(priv);
+
+       return ret;
+}
+
+/**
+ * free_dma_desc_resources - free dma desc resources
+ * @priv: private structure
+ */
+static void free_dma_desc_resources(struct stmmac_priv *priv)
+{
+       /* Release the DMA RX socket buffers */
+       free_dma_rx_desc_resources(priv);
+
+       /* Release the DMA TX socket buffers */
+       free_dma_tx_desc_resources(priv);
+}
+
+/**
+ *  stmmac_mac_enable_rx_queues - Enable MAC rx queues
+ *  @priv: driver private structure
+ *  Description: It is used for enabling the rx queues in the MAC
+ */
+static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       int queue;
+       u8 mode;
 
-               if (priv->tx_skbuff[i]) {
-                       dev_kfree_skb_any(priv->tx_skbuff[i]);
-                       priv->tx_skbuff[i] = NULL;
-                       priv->tx_skbuff_dma[i].buf = 0;
-                       priv->tx_skbuff_dma[i].map_as_page = false;
-               }
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               mode = priv->plat->rx_queues_cfg[queue].mode_to_use;
+               priv->hw->mac->rx_queue_enable(priv->hw, mode, queue);
        }
 }
 
 /**
- * alloc_dma_desc_resources - alloc TX/RX resources.
- * @priv: private structure
- * Description: according to which descriptor can be used (extend or basic)
- * this function allocates the resources for TX and RX paths. In case of
- * reception, for example, it pre-allocated the RX socket buffer in order to
- * allow zero-copy mechanism.
+ * stmmac_start_rx_dma - start RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This starts a RX DMA channel
  */
-static int alloc_dma_desc_resources(struct stmmac_priv *priv)
+static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
 {
-       int ret = -ENOMEM;
-
-       priv->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE, sizeof(dma_addr_t),
-                                           GFP_KERNEL);
-       if (!priv->rx_skbuff_dma)
-               return -ENOMEM;
-
-       priv->rx_skbuff = kmalloc_array(DMA_RX_SIZE, sizeof(struct sk_buff *),
-                                       GFP_KERNEL);
-       if (!priv->rx_skbuff)
-               goto err_rx_skbuff;
-
-       priv->tx_skbuff_dma = kmalloc_array(DMA_TX_SIZE,
-                                           sizeof(*priv->tx_skbuff_dma),
-                                           GFP_KERNEL);
-       if (!priv->tx_skbuff_dma)
-               goto err_tx_skbuff_dma;
-
-       priv->tx_skbuff = kmalloc_array(DMA_TX_SIZE, sizeof(struct sk_buff *),
-                                       GFP_KERNEL);
-       if (!priv->tx_skbuff)
-               goto err_tx_skbuff;
-
-       if (priv->extend_desc) {
-               priv->dma_erx = dma_zalloc_coherent(priv->device, DMA_RX_SIZE *
-                                                   sizeof(struct
-                                                          dma_extended_desc),
-                                                   &priv->dma_rx_phy,
-                                                   GFP_KERNEL);
-               if (!priv->dma_erx)
-                       goto err_dma;
-
-               priv->dma_etx = dma_zalloc_coherent(priv->device, DMA_TX_SIZE *
-                                                   sizeof(struct
-                                                          dma_extended_desc),
-                                                   &priv->dma_tx_phy,
-                                                   GFP_KERNEL);
-               if (!priv->dma_etx) {
-                       dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                         sizeof(struct dma_extended_desc),
-                                         priv->dma_erx, priv->dma_rx_phy);
-                       goto err_dma;
-               }
-       } else {
-               priv->dma_rx = dma_zalloc_coherent(priv->device, DMA_RX_SIZE *
-                                                  sizeof(struct dma_desc),
-                                                  &priv->dma_rx_phy,
-                                                  GFP_KERNEL);
-               if (!priv->dma_rx)
-                       goto err_dma;
+       netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan);
+       priv->hw->dma->start_rx(priv->ioaddr, chan);
+}
 
-               priv->dma_tx = dma_zalloc_coherent(priv->device, DMA_TX_SIZE *
-                                                  sizeof(struct dma_desc),
-                                                  &priv->dma_tx_phy,
-                                                  GFP_KERNEL);
-               if (!priv->dma_tx) {
-                       dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                         sizeof(struct dma_desc),
-                                         priv->dma_rx, priv->dma_rx_phy);
-                       goto err_dma;
-               }
-       }
+/**
+ * stmmac_start_tx_dma - start TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This starts a TX DMA channel
+ */
+static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan);
+       priv->hw->dma->start_tx(priv->ioaddr, chan);
+}
 
-       return 0;
+/**
+ * stmmac_stop_rx_dma - stop RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This stops a RX DMA channel
+ */
+static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_rx(priv->ioaddr, chan);
+}
 
-err_dma:
-       kfree(priv->tx_skbuff);
-err_tx_skbuff:
-       kfree(priv->tx_skbuff_dma);
-err_tx_skbuff_dma:
-       kfree(priv->rx_skbuff);
-err_rx_skbuff:
-       kfree(priv->rx_skbuff_dma);
-       return ret;
+/**
+ * stmmac_stop_tx_dma - stop TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This stops a TX DMA channel
+ */
+static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_tx(priv->ioaddr, chan);
 }
 
-static void free_dma_desc_resources(struct stmmac_priv *priv)
+/**
+ * stmmac_start_all_dma - start all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This starts all the RX and TX DMA channels
+ */
+static void stmmac_start_all_dma(struct stmmac_priv *priv)
 {
-       /* Release the DMA TX/RX socket buffers */
-       dma_free_rx_skbufs(priv);
-       dma_free_tx_skbufs(priv);
-
-       /* Free DMA regions of consistent memory previously allocated */
-       if (!priv->extend_desc) {
-               dma_free_coherent(priv->device,
-                                 DMA_TX_SIZE * sizeof(struct dma_desc),
-                                 priv->dma_tx, priv->dma_tx_phy);
-               dma_free_coherent(priv->device,
-                                 DMA_RX_SIZE * sizeof(struct dma_desc),
-                                 priv->dma_rx, priv->dma_rx_phy);
-       } else {
-               dma_free_coherent(priv->device, DMA_TX_SIZE *
-                                 sizeof(struct dma_extended_desc),
-                                 priv->dma_etx, priv->dma_tx_phy);
-               dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                 sizeof(struct dma_extended_desc),
-                                 priv->dma_erx, priv->dma_rx_phy);
-       }
-       kfree(priv->rx_skbuff_dma);
-       kfree(priv->rx_skbuff);
-       kfree(priv->tx_skbuff_dma);
-       kfree(priv->tx_skbuff);
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
+
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_start_rx_dma(priv, chan);
+
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_start_tx_dma(priv, chan);
 }
 
 /**
- *  stmmac_mac_enable_rx_queues - Enable MAC rx queues
- *  @priv: driver private structure
- *  Description: It is used for enabling the rx queues in the MAC
+ * stmmac_stop_all_dma - stop all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This stops the RX and TX DMA channels
  */
-static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
+static void stmmac_stop_all_dma(struct stmmac_priv *priv)
 {
-       int rx_count = priv->dma_cap.number_rx_queues;
-       int queue = 0;
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
 
-       /* If GMAC does not have multiple queues, then this is not necessary*/
-       if (rx_count == 1)
-               return;
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_stop_rx_dma(priv, chan);
 
-       /**
-        *  If the core is synthesized with multiple rx queues / multiple
-        *  dma channels, then rx queues will be disabled by default.
-        *  For now only rx queue 0 is enabled.
-        */
-       priv->hw->mac->rx_queue_enable(priv->hw, queue);
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_stop_tx_dma(priv, chan);
 }
 
 /**
@@ -1279,11 +1739,20 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
        int rxfifosz = priv->plat->rx_fifo_size;
+       u32 txmode = 0;
+       u32 rxmode = 0;
+       u32 chan = 0;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
 
-       if (priv->plat->force_thresh_dma_mode)
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc, rxfifosz);
-       else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
+       if (priv->plat->force_thresh_dma_mode) {
+               txmode = tc;
+               rxmode = tc;
+       } else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
                /*
                 * In case of GMAC, SF mode can be enabled
                 * to perform the TX COE in HW. This depends on:
@@ -1291,37 +1760,53 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
                 * 2) There is no bugged Jumbo frame support
                 *    that needs to not insert csum in the TDES.
                 */
-               priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE,
-                                       rxfifosz);
+               txmode = SF_DMA_MODE;
+               rxmode = SF_DMA_MODE;
                priv->xstats.threshold = SF_DMA_MODE;
-       } else
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE,
+       } else {
+               txmode = tc;
+               rxmode = SF_DMA_MODE;
+       }
+
+       /* configure all channels */
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                                  rxfifosz);
+
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
                                        rxfifosz);
+       }
 }
 
 /**
  * stmmac_tx_clean - to manage the transmission completion
  * @priv: driver private structure
+ * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv)
+static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        unsigned int bytes_compl = 0, pkts_compl = 0;
-       unsigned int entry = priv->dirty_tx;
+       unsigned int entry = tx_q->dirty_tx;
 
        netif_tx_lock(priv->dev);
 
        priv->xstats.tx_clean++;
 
-       while (entry != priv->cur_tx) {
-               struct sk_buff *skb = priv->tx_skbuff[entry];
+       while (entry != tx_q->cur_tx) {
+               struct sk_buff *skb = tx_q->tx_skbuff[entry];
                struct dma_desc *p;
                int status;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_etx + entry);
+                       p = (struct dma_desc *)(tx_q->dma_etx + entry);
                else
-                       p = priv->dma_tx + entry;
+                       p = tx_q->dma_tx + entry;
 
                status = priv->hw->desc->tx_status(&priv->dev->stats,
                                                      &priv->xstats, p,
@@ -1342,48 +1827,51 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
                        stmmac_get_tx_hwtstamp(priv, p, skb);
                }
 
-               if (likely(priv->tx_skbuff_dma[entry].buf)) {
-                       if (priv->tx_skbuff_dma[entry].map_as_page)
+               if (likely(tx_q->tx_skbuff_dma[entry].buf)) {
+                       if (tx_q->tx_skbuff_dma[entry].map_as_page)
                                dma_unmap_page(priv->device,
-                                              priv->tx_skbuff_dma[entry].buf,
-                                              priv->tx_skbuff_dma[entry].len,
+                                              tx_q->tx_skbuff_dma[entry].buf,
+                                              tx_q->tx_skbuff_dma[entry].len,
                                               DMA_TO_DEVICE);
                        else
                                dma_unmap_single(priv->device,
-                                                priv->tx_skbuff_dma[entry].buf,
-                                                priv->tx_skbuff_dma[entry].len,
+                                                tx_q->tx_skbuff_dma[entry].buf,
+                                                tx_q->tx_skbuff_dma[entry].len,
                                                 DMA_TO_DEVICE);
-                       priv->tx_skbuff_dma[entry].buf = 0;
-                       priv->tx_skbuff_dma[entry].len = 0;
-                       priv->tx_skbuff_dma[entry].map_as_page = false;
+                       tx_q->tx_skbuff_dma[entry].buf = 0;
+                       tx_q->tx_skbuff_dma[entry].len = 0;
+                       tx_q->tx_skbuff_dma[entry].map_as_page = false;
                }
 
                if (priv->hw->mode->clean_desc3)
-                       priv->hw->mode->clean_desc3(priv, p);
+                       priv->hw->mode->clean_desc3(tx_q, p);
 
-               priv->tx_skbuff_dma[entry].last_segment = false;
-               priv->tx_skbuff_dma[entry].is_jumbo = false;
+               tx_q->tx_skbuff_dma[entry].last_segment = false;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = false;
 
                if (likely(skb != NULL)) {
                        pkts_compl++;
                        bytes_compl += skb->len;
                        dev_consume_skb_any(skb);
-                       priv->tx_skbuff[entry] = NULL;
+                       tx_q->tx_skbuff[entry] = NULL;
                }
 
                priv->hw->desc->release_tx_desc(p, priv->mode);
 
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
        }
-       priv->dirty_tx = entry;
+       tx_q->dirty_tx = entry;
+
+       netdev_tx_completed_queue(netdev_get_tx_queue(priv->dev, queue),
+                                 pkts_compl, bytes_compl);
 
-       netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
+       if (unlikely(netif_tx_queue_stopped(netdev_get_tx_queue(priv->dev,
+                                                               queue))) &&
+           stmmac_tx_avail(priv, queue) > STMMAC_TX_THRESH) {
 
-       if (unlikely(netif_queue_stopped(priv->dev) &&
-           stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
                netif_dbg(priv, tx_done, priv->dev,
                          "%s: restart transmit\n", __func__);
-               netif_wake_queue(priv->dev);
+               netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
@@ -1393,45 +1881,76 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
        netif_tx_unlock(priv->dev);
 }
 
-static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->enable_dma_irq(priv->ioaddr);
+       priv->hw->dma->enable_dma_irq(priv->ioaddr, chan);
 }
 
-static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->disable_dma_irq(priv->ioaddr);
+       priv->hw->dma->disable_dma_irq(priv->ioaddr, chan);
 }
 
 /**
  * stmmac_tx_err - to manage the tx error
  * @priv: driver private structure
+ * @chan: channel index
  * Description: it cleans the descriptors and restarts the transmission
  * in case of transmission errors.
  */
-static void stmmac_tx_err(struct stmmac_priv *priv)
+static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
        int i;
-       netif_stop_queue(priv->dev);
 
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       dma_free_tx_skbufs(priv);
+       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, chan));
+
+       stmmac_stop_tx_dma(priv, chan);
+       dma_free_tx_skbufs(priv, chan);
        for (i = 0; i < DMA_TX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
                else
-                       priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
-       netdev_reset_queue(priv->dev);
-       priv->hw->dma->start_tx(priv->ioaddr);
+       tx_q->dirty_tx = 0;
+       tx_q->cur_tx = 0;
+       netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
+       stmmac_start_tx_dma(priv, chan);
 
        priv->dev->stats.tx_errors++;
-       netif_wake_queue(priv->dev);
+       netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, chan));
+}
+
+/**
+ *  stmmac_set_dma_operation_mode - Set DMA operation mode by channel
+ *  @priv: driver private structure
+ *  @txmode: TX operating mode
+ *  @rxmode: RX operating mode
+ *  @chan: channel index
+ *  Description: it is used for configuring of the DMA operation mode in
+ *  runtime in order to program the tx/rx DMA thresholds or Store-And-Forward
+ *  mode.
+ */
+static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
+                                         u32 rxmode, u32 chan)
+{
+       int rxfifosz = priv->plat->rx_fifo_size;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
+
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                          rxfifosz);
+               priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
+                                       rxfifosz);
+       }
 }
 
 /**
@@ -1443,31 +1962,43 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
  */
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
+       u32 tx_channel_count = priv->plat->tx_queues_to_use;
        int status;
-       int rxfifosz = priv->plat->rx_fifo_size;
+       u32 chan;
+
+       for (chan = 0; chan < tx_channel_count; chan++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
-       status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
-       if (likely((status & handle_rx)) || (status & handle_tx)) {
-               if (likely(napi_schedule_prep(&priv->napi))) {
-                       stmmac_disable_dma_irq(priv);
-                       __napi_schedule(&priv->napi);
+               status = priv->hw->dma->dma_interrupt(priv->ioaddr,
+                                                     &priv->xstats, chan);
+               if (likely((status & handle_rx)) || (status & handle_tx)) {
+                       if (likely(napi_schedule_prep(&rx_q->napi))) {
+                               stmmac_disable_dma_irq(priv, chan);
+                               __napi_schedule(&rx_q->napi);
+                       }
                }
-       }
-       if (unlikely(status & tx_hard_error_bump_tc)) {
-               /* Try to bump up the dma threshold on this failure */
-               if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
-                   (tc <= 256)) {
-                       tc += 64;
-                       if (priv->plat->force_thresh_dma_mode)
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc,
-                                                       rxfifosz);
-                       else
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc,
-                                                       SF_DMA_MODE, rxfifosz);
-                       priv->xstats.threshold = tc;
+
+               if (unlikely(status & tx_hard_error_bump_tc)) {
+                       /* Try to bump up the dma threshold on this failure */
+                       if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
+                           (tc <= 256)) {
+                               tc += 64;
+                               if (priv->plat->force_thresh_dma_mode)
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                     tc,
+                                                                     tc,
+                                                                     chan);
+                               else
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                   tc,
+                                                                   SF_DMA_MODE,
+                                                                   chan);
+                               priv->xstats.threshold = tc;
+                       }
+               } else if (unlikely(status == tx_hard_error)) {
+                       stmmac_tx_err(priv, chan);
                }
-       } else if (unlikely(status == tx_hard_error))
-               stmmac_tx_err(priv);
+       }
 }
 
 /**
@@ -1574,6 +2105,13 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       struct stmmac_rx_queue *rx_q;
+       struct stmmac_tx_queue *tx_q;
+       u32 dummy_dma_rx_phy = 0;
+       u32 dummy_dma_tx_phy = 0;
+       u32 chan = 0;
        int atds = 0;
        int ret = 0;
 
@@ -1591,19 +2129,49 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
                return ret;
        }
 
-       priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
-                           priv->dma_tx_phy, priv->dma_rx_phy, atds);
-
        if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               priv->rx_tail_addr = priv->dma_rx_phy +
-                           (DMA_RX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, priv->rx_tail_addr,
-                                              STMMAC_CHAN0);
+               /* DMA Configuration */
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
+
+               /* DMA RX Channel Configuration */
+               for (chan = 0; chan < rx_channels_count; chan++) {
+                       rx_q = &priv->rx_queue[chan];
+
+                       priv->hw->dma->init_rx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   rx_q->dma_rx_phy, chan);
+
+                       rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+                                   (DMA_RX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                      rx_q->rx_tail_addr,
+                                                      chan);
+               }
+
+               /* DMA TX Channel Configuration */
+               for (chan = 0; chan < tx_channels_count; chan++) {
+                       tx_q = &priv->tx_queue[chan];
+
+                       priv->hw->dma->init_chan(priv->ioaddr,
+                                                priv->plat->dma_cfg,
+                                                chan);
 
-               priv->tx_tail_addr = priv->dma_tx_phy +
-                           (DMA_TX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                              STMMAC_CHAN0);
+                       priv->hw->dma->init_tx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   tx_q->dma_tx_phy, chan);
+
+                       tx_q->tx_tail_addr = tx_q->dma_tx_phy +
+                                   (DMA_TX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr,
+                                                      tx_q->tx_tail_addr,
+                                                      chan);
+               }
+       } else {
+               rx_q = &priv->rx_queue[chan];
+               tx_q = &priv->tx_queue[chan];
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
        }
 
        if (priv->plat->axi && priv->hw->dma->axi)
@@ -1621,8 +2189,12 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 static void stmmac_tx_timer(unsigned long data)
 {
        struct stmmac_priv *priv = (struct stmmac_priv *)data;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue;
 
-       stmmac_tx_clean(priv);
+       /* let's scan all the tx queues */
+       for (queue = 0; queue < tx_queues_count; queue++)
+               stmmac_tx_clean(priv, queue);
 }
 
 /**
@@ -1644,6 +2216,196 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
        add_timer(&priv->txtimer);
 }
 
+static void stmmac_set_rings_length(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan;
+
+       /* set TX ring length */
+       if (priv->hw->dma->set_tx_ring_len) {
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->set_tx_ring_len(priv->ioaddr,
+                                                      (DMA_TX_SIZE - 1), chan);
+       }
+
+       /* set RX ring length */
+       if (priv->hw->dma->set_rx_ring_len) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->set_rx_ring_len(priv->ioaddr,
+                                                      (DMA_RX_SIZE - 1), chan);
+       }
+}
+
+/**
+ *  stmmac_set_tx_queue_weight - Set TX queue weight
+ *  @priv: driver private structure
+ *  Description: It is used for setting TX queues weight
+ */
+static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 weight;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               weight = priv->plat->tx_queues_cfg[queue].weight;
+               priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue);
+       }
+}
+
+/**
+ *  stmmac_configure_cbs - Configure CBS in TX queue
+ *  @priv: driver private structure
+ *  Description: It is used for configuring CBS in AVB TX queues
+ */
+static void stmmac_configure_cbs(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 mode_to_use;
+       u32 queue;
+
+       /* queue 0 is reserved for legacy traffic */
+       for (queue = 1; queue < tx_queues_count; queue++) {
+               mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
+               if (mode_to_use == MTL_QUEUE_DCB)
+                       continue;
+
+               priv->hw->mac->config_cbs(priv->hw,
+                               priv->plat->tx_queues_cfg[queue].send_slope,
+                               priv->plat->tx_queues_cfg[queue].idle_slope,
+                               priv->plat->tx_queues_cfg[queue].high_credit,
+                               priv->plat->tx_queues_cfg[queue].low_credit,
+                               queue);
+       }
+}
+
+/**
+ *  stmmac_rx_queue_dma_chan_map - Map RX queue to RX dma channel
+ *  @priv: driver private structure
+ *  Description: It is used for mapping RX queues to RX dma channels
+ */
+static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 chan;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               chan = priv->plat->rx_queues_cfg[queue].chan;
+               priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_prio - Configure RX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX Queue Priority
+ */
+static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               if (!priv->plat->rx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->rx_queues_cfg[queue].prio;
+               priv->hw->mac->rx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_tx_queues_prio - Configure TX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the TX Queue Priority
+ */
+static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               if (!priv->plat->tx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->tx_queues_cfg[queue].prio;
+               priv->hw->mac->tx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_routing - Configure RX Queue Routing
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX queue routing
+ */
+static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u8 packet;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               /* no specific packet type routing specified for the queue */
+               if (priv->plat->rx_queues_cfg[queue].pkt_route == 0x0)
+                       continue;
+
+               packet = priv->plat->rx_queues_cfg[queue].pkt_route;
+               priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+       }
+}
+
+/**
+ *  stmmac_mtl_configuration - Configure MTL
+ *  @priv: driver private structure
+ *  Description: It is used for configurring MTL
+ */
+static void stmmac_mtl_configuration(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+
+       if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight)
+               stmmac_set_tx_queue_weight(priv);
+
+       /* Configure MTL RX algorithms */
+       if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms)
+               priv->hw->mac->prog_mtl_rx_algorithms(priv->hw,
+                                               priv->plat->rx_sched_algorithm);
+
+       /* Configure MTL TX algorithms */
+       if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms)
+               priv->hw->mac->prog_mtl_tx_algorithms(priv->hw,
+                                               priv->plat->tx_sched_algorithm);
+
+       /* Configure CBS in AVB TX queues */
+       if (tx_queues_count > 1 && priv->hw->mac->config_cbs)
+               stmmac_configure_cbs(priv);
+
+       /* Map RX MTL to DMA channels */
+       if (priv->hw->mac->map_mtl_to_dma)
+               stmmac_rx_queue_dma_chan_map(priv);
+
+       /* Enable MAC RX Queues */
+       if (priv->hw->mac->rx_queue_enable)
+               stmmac_mac_enable_rx_queues(priv);
+
+       /* Set RX priorities */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio)
+               stmmac_mac_config_rx_queues_prio(priv);
+
+       /* Set TX priorities */
+       if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio)
+               stmmac_mac_config_tx_queues_prio(priv);
+
+       /* Set RX routing */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing)
+               stmmac_mac_config_rx_queues_routing(priv);
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -1659,6 +2421,9 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 chan;
        int ret;
 
        /* DMA initialization and SW reset */
@@ -1688,9 +2453,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        /* Initialize the MAC Core */
        priv->hw->mac->core_init(priv->hw, dev->mtu);
 
-       /* Initialize MAC RX Queues */
-       if (priv->hw->mac->rx_queue_enable)
-               stmmac_mac_enable_rx_queues(priv);
+       /* Initialize MTL*/
+       if (priv->synopsys_id >= DWMAC_CORE_4_00)
+               stmmac_mtl_configuration(priv);
 
        ret = priv->hw->mac->rx_ipc(priv->hw);
        if (!ret) {
@@ -1700,10 +2465,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        }
 
        /* Enable the MAC Rx/Tx */
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               stmmac_dwmac4_set_mac(priv->ioaddr, true);
-       else
-               stmmac_set_mac(priv->ioaddr, true);
+       priv->hw->mac->set_mac(priv->ioaddr, true);
 
        /* Set the HW DMA mode and the COE */
        stmmac_dma_operation_mode(priv);
@@ -1711,6 +2473,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        stmmac_mmc_setup(priv);
 
        if (init_ptp) {
+               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+               if (ret < 0)
+                       netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
+
                ret = stmmac_init_ptp(priv);
                if (ret == -EOPNOTSUPP)
                        netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -1725,35 +2491,37 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
                            __func__);
 #endif
        /* Start the ball rolling... */
-       netdev_dbg(priv->dev, "DMA RX/TX processes started...\n");
-       priv->hw->dma->start_tx(priv->ioaddr);
-       priv->hw->dma->start_rx(priv->ioaddr);
+       stmmac_start_all_dma(priv);
 
        priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
        if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
                priv->rx_riwt = MAX_DMA_RIWT;
-               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
+               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
        }
 
        if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
                priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
-       /*  set TX ring length */
-       if (priv->hw->dma->set_tx_ring_len)
-               priv->hw->dma->set_tx_ring_len(priv->ioaddr,
-                                              (DMA_TX_SIZE - 1));
-       /*  set RX ring length */
-       if (priv->hw->dma->set_rx_ring_len)
-               priv->hw->dma->set_rx_ring_len(priv->ioaddr,
-                                              (DMA_RX_SIZE - 1));
+       /* set TX and RX rings length */
+       stmmac_set_rings_length(priv);
+
        /* Enable TSO */
-       if (priv->tso)
-               priv->hw->dma->enable_tso(priv->ioaddr, 1, STMMAC_CHAN0);
+       if (priv->tso) {
+               for (chan = 0; chan < tx_cnt; chan++)
+                       priv->hw->dma->enable_tso(priv->ioaddr, 1, chan);
+       }
 
        return 0;
 }
 
+static void stmmac_hw_teardown(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       clk_disable_unprepare(priv->plat->clk_ptp_ref);
+}
+
 /**
  *  stmmac_open - open entry point of the driver
  *  @dev : pointer to the device structure.
@@ -1821,7 +2589,7 @@ static int stmmac_open(struct net_device *dev)
                netdev_err(priv->dev,
                           "%s: ERROR: allocating the IRQ %d (error: %d)\n",
                           __func__, dev->irq, ret);
-               goto init_error;
+               goto irq_error;
        }
 
        /* Request the Wake IRQ in case of another line is used for WoL */
@@ -1848,8 +2616,8 @@ static int stmmac_open(struct net_device *dev)
                }
        }
 
-       napi_enable(&priv->napi);
-       netif_start_queue(dev);
+       stmmac_enable_all_queues(priv);
+       stmmac_start_all_queues(priv);
 
        return 0;
 
@@ -1858,7 +2626,12 @@ lpiirq_error:
                free_irq(priv->wol_irq, dev);
 wolirq_error:
        free_irq(dev->irq, dev);
+irq_error:
+       if (dev->phydev)
+               phy_stop(dev->phydev);
 
+       del_timer_sync(&priv->txtimer);
+       stmmac_hw_teardown(dev);
 init_error:
        free_dma_desc_resources(priv);
 dma_desc_error:
@@ -1887,9 +2660,9 @@ static int stmmac_release(struct net_device *dev)
                phy_disconnect(dev->phydev);
        }
 
-       netif_stop_queue(dev);
+       stmmac_stop_all_queues(priv);
 
-       napi_disable(&priv->napi);
+       stmmac_disable_all_queues(priv);
 
        del_timer_sync(&priv->txtimer);
 
@@ -1901,14 +2674,13 @@ static int stmmac_release(struct net_device *dev)
                free_irq(priv->lpi_irq, dev);
 
        /* Stop TX/RX DMA and clear the descriptors */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Release and free the Rx/Tx resources */
        free_dma_desc_resources(priv);
 
        /* Disable the MAC Rx/Tx */
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
 
        netif_carrier_off(dev);
 
@@ -1927,22 +2699,24 @@ static int stmmac_release(struct net_device *dev)
  *  @des: buffer start address
  *  @total_len: total length to fill in descriptors
  *  @last_segmant: condition for the last descriptor
+ *  @queue: TX queue index
  *  Description:
  *  This function fills descriptor and request new descriptors according to
  *  buffer length to fill
  */
 static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
-                                int total_len, bool last_segment)
+                                int total_len, bool last_segment, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        struct dma_desc *desc;
-       int tmp_len;
        u32 buff_size;
+       int tmp_len;
 
        tmp_len = total_len;
 
        while (tmp_len > 0) {
-               priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
-               desc = priv->dma_tx + priv->cur_tx;
+               tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+               desc = tx_q->dma_tx + tx_q->cur_tx;
 
                desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
                buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
@@ -1986,23 +2760,28 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
  */
 static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       u32 pay_len, mss;
-       int tmp_pay_len = 0;
+       struct dma_desc *desc, *first, *mss_desc = NULL;
        struct stmmac_priv *priv = netdev_priv(dev);
        int nfrags = skb_shinfo(skb)->nr_frags;
+       u32 queue = skb_get_queue_mapping(skb);
        unsigned int first_entry, des;
-       struct dma_desc *desc, *first, *mss_desc = NULL;
+       struct stmmac_tx_queue *tx_q;
+       int tmp_pay_len = 0;
+       u32 pay_len, mss;
        u8 proto_hdr_len;
        int i;
 
+       tx_q = &priv->tx_queue[queue];
+
        /* Compute header lengths */
        proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
        /* Desc availability based on threshold should be enough safe */
-       if (unlikely(stmmac_tx_avail(priv) <
+       if (unlikely(stmmac_tx_avail(priv, queue) <
                (((skb->len - proto_hdr_len) / TSO_MAX_BUFF_SIZE + 1)))) {
-               if (!netif_queue_stopped(dev)) {
-                       netif_stop_queue(dev);
+               if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
+                                                               queue));
                        /* This is a hard error, log it. */
                        netdev_err(priv->dev,
                                   "%s: Tx Ring full when queue awake\n",
@@ -2017,10 +2796,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /* set new MSS value if needed */
        if (mss != priv->mss) {
-               mss_desc = priv->dma_tx + priv->cur_tx;
+               mss_desc = tx_q->dma_tx + tx_q->cur_tx;
                priv->hw->desc->set_mss(mss_desc, mss);
                priv->mss = mss;
-               priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
+               tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
        }
 
        if (netif_msg_tx_queued(priv)) {
@@ -2030,9 +2809,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                        skb->data_len);
        }
 
-       first_entry = priv->cur_tx;
+       first_entry = tx_q->cur_tx;
 
-       desc = priv->dma_tx + first_entry;
+       desc = tx_q->dma_tx + first_entry;
        first = desc;
 
        /* first descriptor: fill Headers on Buf1 */
@@ -2041,9 +2820,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        if (dma_mapping_error(priv->device, des))
                goto dma_map_err;
 
-       priv->tx_skbuff_dma[first_entry].buf = des;
-       priv->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
-       priv->tx_skbuff[first_entry] = skb;
+       tx_q->tx_skbuff_dma[first_entry].buf = des;
+       tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
+       tx_q->tx_skbuff[first_entry] = skb;
 
        first->des0 = cpu_to_le32(des);
 
@@ -2054,7 +2833,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        /* If needed take extra descriptors to fill the remaining payload */
        tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
 
-       stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0));
+       stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
 
        /* Prepare fragments */
        for (i = 0; i < nfrags; i++) {
@@ -2063,24 +2842,26 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                des = skb_frag_dma_map(priv->device, frag, 0,
                                       skb_frag_size(frag),
                                       DMA_TO_DEVICE);
+               if (dma_mapping_error(priv->device, des))
+                       goto dma_map_err;
 
                stmmac_tso_allocator(priv, des, skb_frag_size(frag),
-                                    (i == nfrags - 1));
+                                    (i == nfrags - 1), queue);
 
-               priv->tx_skbuff_dma[priv->cur_tx].buf = des;
-               priv->tx_skbuff_dma[priv->cur_tx].len = skb_frag_size(frag);
-               priv->tx_skbuff[priv->cur_tx] = NULL;
-               priv->tx_skbuff_dma[priv->cur_tx].map_as_page = true;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
+               tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
        }
 
-       priv->tx_skbuff_dma[priv->cur_tx].last_segment = true;
+       tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
-       priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
+       tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 
-       if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
+       if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
                netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
                          __func__);
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        dev->stats.tx_bytes += skb->len;
@@ -2112,7 +2893,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        priv->hw->desc->prepare_tso_tx_desc(first, 1,
                        proto_hdr_len,
                        pay_len,
-                       1, priv->tx_skbuff_dma[first_entry].last_segment,
+                       1, tx_q->tx_skbuff_dma[first_entry].last_segment,
                        tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
        /* If context desc is used to change MSS */
@@ -2127,20 +2908,20 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
-                       __func__, priv->cur_tx, priv->dirty_tx, first_entry,
-                       priv->cur_tx, first, nfrags);
+                       __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
+                       tx_q->cur_tx, first, nfrags);
 
-               priv->hw->desc->display_ring((void *)priv->dma_tx, DMA_TX_SIZE,
+               priv->hw->desc->display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE,
                                             0);
 
                pr_info(">>> frame to be transmitted: ");
                print_pkt(skb->data, skb_headlen(skb));
        }
 
-       netdev_sent_queue(dev, skb->len);
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                      STMMAC_CHAN0);
+       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
+                                      queue);
 
        return NETDEV_TX_OK;
 
@@ -2164,21 +2945,26 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
        struct stmmac_priv *priv = netdev_priv(dev);
        unsigned int nopaged_len = skb_headlen(skb);
        int i, csum_insertion = 0, is_jumbo = 0;
+       u32 queue = skb_get_queue_mapping(skb);
        int nfrags = skb_shinfo(skb)->nr_frags;
        unsigned int entry, first_entry;
        struct dma_desc *desc, *first;
+       struct stmmac_tx_queue *tx_q;
        unsigned int enh_desc;
        unsigned int des;
 
+       tx_q = &priv->tx_queue[queue];
+
        /* Manage oversized TCP frames for GMAC4 device */
        if (skb_is_gso(skb) && priv->tso) {
                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                        return stmmac_tso_xmit(skb, dev);
        }
 
-       if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
-               if (!netif_queue_stopped(dev)) {
-                       netif_stop_queue(dev);
+       if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
+               if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
+                                                               queue));
                        /* This is a hard error, log it. */
                        netdev_err(priv->dev,
                                   "%s: Tx Ring full when queue awake\n",
@@ -2190,19 +2976,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
        if (priv->tx_path_in_lpi_mode)
                stmmac_disable_eee_mode(priv);
 
-       entry = priv->cur_tx;
+       entry = tx_q->cur_tx;
        first_entry = entry;
 
        csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
        if (likely(priv->extend_desc))
-               desc = (struct dma_desc *)(priv->dma_etx + entry);
+               desc = (struct dma_desc *)(tx_q->dma_etx + entry);
        else
-               desc = priv->dma_tx + entry;
+               desc = tx_q->dma_tx + entry;
 
        first = desc;
 
-       priv->tx_skbuff[first_entry] = skb;
+       tx_q->tx_skbuff[first_entry] = skb;
 
        enh_desc = priv->plat->enh_desc;
        /* To program the descriptors according to the size of the frame */
@@ -2211,7 +2997,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (unlikely(is_jumbo) && likely(priv->synopsys_id <
                                         DWMAC_CORE_4_00)) {
-               entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion);
+               entry = priv->hw->mode->jumbo_frm(tx_q, skb, csum_insertion);
                if (unlikely(entry < 0))
                        goto dma_map_err;
        }
@@ -2224,26 +3010,26 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
                if (likely(priv->extend_desc))
-                       desc = (struct dma_desc *)(priv->dma_etx + entry);
+                       desc = (struct dma_desc *)(tx_q->dma_etx + entry);
                else
-                       desc = priv->dma_tx + entry;
+                       desc = tx_q->dma_tx + entry;
 
                des = skb_frag_dma_map(priv->device, frag, 0, len,
                                       DMA_TO_DEVICE);
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err; /* should reuse desc w/o issues */
 
-               priv->tx_skbuff[entry] = NULL;
+               tx_q->tx_skbuff[entry] = NULL;
 
-               priv->tx_skbuff_dma[entry].buf = des;
+               tx_q->tx_skbuff_dma[entry].buf = des;
                if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
                        desc->des0 = cpu_to_le32(des);
                else
                        desc->des2 = cpu_to_le32(des);
 
-               priv->tx_skbuff_dma[entry].map_as_page = true;
-               priv->tx_skbuff_dma[entry].len = len;
-               priv->tx_skbuff_dma[entry].last_segment = last_segment;
+               tx_q->tx_skbuff_dma[entry].map_as_page = true;
+               tx_q->tx_skbuff_dma[entry].len = len;
+               tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
 
                /* Prepare the descriptor and set the own bit too */
                priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
@@ -2252,20 +3038,20 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
-       priv->cur_tx = entry;
+       tx_q->cur_tx = entry;
 
        if (netif_msg_pktdata(priv)) {
                void *tx_head;
 
                netdev_dbg(priv->dev,
                           "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
-                          __func__, priv->cur_tx, priv->dirty_tx, first_entry,
+                          __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
                           entry, first, nfrags);
 
                if (priv->extend_desc)
-                       tx_head = (void *)priv->dma_etx;
+                       tx_head = (void *)tx_q->dma_etx;
                else
-                       tx_head = (void *)priv->dma_tx;
+                       tx_head = (void *)tx_q->dma_tx;
 
                priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
 
@@ -2273,10 +3059,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                print_pkt(skb->data, skb->len);
        }
 
-       if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
+       if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
                netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
                          __func__);
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        dev->stats.tx_bytes += skb->len;
@@ -2311,14 +3097,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err;
 
-               priv->tx_skbuff_dma[first_entry].buf = des;
+               tx_q->tx_skbuff_dma[first_entry].buf = des;
                if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
                        first->des0 = cpu_to_le32(des);
                else
                        first->des2 = cpu_to_le32(des);
 
-               priv->tx_skbuff_dma[first_entry].len = nopaged_len;
-               priv->tx_skbuff_dma[first_entry].last_segment = last_segment;
+               tx_q->tx_skbuff_dma[first_entry].len = nopaged_len;
+               tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment;
 
                if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
                             priv->hwts_tx_en)) {
@@ -2339,13 +3125,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                dma_wmb();
        }
 
-       netdev_sent_queue(dev, skb->len);
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
        if (priv->synopsys_id < DWMAC_CORE_4_00)
                priv->hw->dma->enable_dma_transmission(priv->ioaddr);
        else
-               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                              STMMAC_CHAN0);
+               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
+                                              queue);
 
        return NETDEV_TX_OK;
 
@@ -2373,9 +3159,9 @@ static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb)
 }
 
 
-static inline int stmmac_rx_threshold_count(struct stmmac_priv *priv)
+static inline int stmmac_rx_threshold_count(struct stmmac_rx_queue *rx_q)
 {
-       if (priv->rx_zeroc_thresh < STMMAC_RX_THRESH)
+       if (rx_q->rx_zeroc_thresh < STMMAC_RX_THRESH)
                return 0;
 
        return 1;
@@ -2384,30 +3170,33 @@ static inline int stmmac_rx_threshold_count(struct stmmac_priv *priv)
 /**
  * stmmac_rx_refill - refill used skb preallocated buffers
  * @priv: driver private structure
+ * @queue: RX queue index
  * Description : this is to reallocate the skb for the reception process
  * that is based on zero-copy.
  */
-static inline void stmmac_rx_refill(struct stmmac_priv *priv)
+static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int dirty = stmmac_rx_dirty(priv, queue);
+       unsigned int entry = rx_q->dirty_rx;
+
        int bfsize = priv->dma_buf_sz;
-       unsigned int entry = priv->dirty_rx;
-       int dirty = stmmac_rx_dirty(priv);
 
        while (dirty-- > 0) {
                struct dma_desc *p;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_erx + entry);
+                       p = (struct dma_desc *)(rx_q->dma_erx + entry);
                else
-                       p = priv->dma_rx + entry;
+                       p = rx_q->dma_rx + entry;
 
-               if (likely(priv->rx_skbuff[entry] == NULL)) {
+               if (likely(!rx_q->rx_skbuff[entry])) {
                        struct sk_buff *skb;
 
                        skb = netdev_alloc_skb_ip_align(priv->dev, bfsize);
                        if (unlikely(!skb)) {
                                /* so for a while no zero-copy! */
-                               priv->rx_zeroc_thresh = STMMAC_RX_THRESH;
+                               rx_q->rx_zeroc_thresh = STMMAC_RX_THRESH;
                                if (unlikely(net_ratelimit()))
                                        dev_err(priv->device,
                                                "fail to alloc skb entry %d\n",
@@ -2415,28 +3204,28 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
                                break;
                        }
 
-                       priv->rx_skbuff[entry] = skb;
-                       priv->rx_skbuff_dma[entry] =
+                       rx_q->rx_skbuff[entry] = skb;
+                       rx_q->rx_skbuff_dma[entry] =
                            dma_map_single(priv->device, skb->data, bfsize,
                                           DMA_FROM_DEVICE);
                        if (dma_mapping_error(priv->device,
-                                             priv->rx_skbuff_dma[entry])) {
+                                             rx_q->rx_skbuff_dma[entry])) {
                                netdev_err(priv->dev, "Rx DMA map failed\n");
                                dev_kfree_skb(skb);
                                break;
                        }
 
                        if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
+                               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
                                p->des1 = 0;
                        } else {
-                               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
+                               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
                        }
                        if (priv->hw->mode->refill_desc3)
-                               priv->hw->mode->refill_desc3(priv, p);
+                               priv->hw->mode->refill_desc3(rx_q, p);
 
-                       if (priv->rx_zeroc_thresh > 0)
-                               priv->rx_zeroc_thresh--;
+                       if (rx_q->rx_zeroc_thresh > 0)
+                               rx_q->rx_zeroc_thresh--;
 
                        netif_dbg(priv, rx_status, priv->dev,
                                  "refill entry #%d\n", entry);
@@ -2452,31 +3241,33 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 
                entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
        }
-       priv->dirty_rx = entry;
+       rx_q->dirty_rx = entry;
 }
 
 /**
  * stmmac_rx - manage the receive process
  * @priv: driver private structure
- * @limit: napi bugget.
+ * @limit: napi bugget
+ * @queue: RX queue index.
  * Description :  this the function called by the napi poll method.
  * It gets all the frames inside the ring.
  */
-static int stmmac_rx(struct stmmac_priv *priv, int limit)
+static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
-       unsigned int entry = priv->cur_rx;
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       unsigned int entry = rx_q->cur_rx;
+       int coe = priv->hw->rx_csum;
        unsigned int next_entry;
        unsigned int count = 0;
-       int coe = priv->hw->rx_csum;
 
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
 
                netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
                if (priv->extend_desc)
-                       rx_head = (void *)priv->dma_erx;
+                       rx_head = (void *)rx_q->dma_erx;
                else
-                       rx_head = (void *)priv->dma_rx;
+                       rx_head = (void *)rx_q->dma_rx;
 
                priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true);
        }
@@ -2486,9 +3277,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                struct dma_desc *np;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_erx + entry);
+                       p = (struct dma_desc *)(rx_q->dma_erx + entry);
                else
-                       p = priv->dma_rx + entry;
+                       p = rx_q->dma_rx + entry;
 
                /* read the status of the incoming frame */
                status = priv->hw->desc->rx_status(&priv->dev->stats,
@@ -2499,20 +3290,20 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
                count++;
 
-               priv->cur_rx = STMMAC_GET_ENTRY(priv->cur_rx, DMA_RX_SIZE);
-               next_entry = priv->cur_rx;
+               rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
+               next_entry = rx_q->cur_rx;
 
                if (priv->extend_desc)
-                       np = (struct dma_desc *)(priv->dma_erx + next_entry);
+                       np = (struct dma_desc *)(rx_q->dma_erx + next_entry);
                else
-                       np = priv->dma_rx + next_entry;
+                       np = rx_q->dma_rx + next_entry;
 
                prefetch(np);
 
                if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
                        priv->hw->desc->rx_extended_status(&priv->dev->stats,
                                                           &priv->xstats,
-                                                          priv->dma_erx +
+                                                          rx_q->dma_erx +
                                                           entry);
                if (unlikely(status == discard_frame)) {
                        priv->dev->stats.rx_errors++;
@@ -2522,9 +3313,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                                 * them in stmmac_rx_refill() function so that
                                 * device can reuse it.
                                 */
-                               priv->rx_skbuff[entry] = NULL;
+                               rx_q->rx_skbuff[entry] = NULL;
                                dma_unmap_single(priv->device,
-                                                priv->rx_skbuff_dma[entry],
+                                                rx_q->rx_skbuff_dma[entry],
                                                 priv->dma_buf_sz,
                                                 DMA_FROM_DEVICE);
                        }
@@ -2572,7 +3363,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                         */
                        if (unlikely(!priv->plat->has_gmac4 &&
                                     ((frame_len < priv->rx_copybreak) ||
-                                    stmmac_rx_threshold_count(priv)))) {
+                                    stmmac_rx_threshold_count(rx_q)))) {
                                skb = netdev_alloc_skb_ip_align(priv->dev,
                                                                frame_len);
                                if (unlikely(!skb)) {
@@ -2584,21 +3375,21 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                                }
 
                                dma_sync_single_for_cpu(priv->device,
-                                                       priv->rx_skbuff_dma
+                                                       rx_q->rx_skbuff_dma
                                                        [entry], frame_len,
                                                        DMA_FROM_DEVICE);
                                skb_copy_to_linear_data(skb,
-                                                       priv->
+                                                       rx_q->
                                                        rx_skbuff[entry]->data,
                                                        frame_len);
 
                                skb_put(skb, frame_len);
                                dma_sync_single_for_device(priv->device,
-                                                          priv->rx_skbuff_dma
+                                                          rx_q->rx_skbuff_dma
                                                           [entry], frame_len,
                                                           DMA_FROM_DEVICE);
                        } else {
-                               skb = priv->rx_skbuff[entry];
+                               skb = rx_q->rx_skbuff[entry];
                                if (unlikely(!skb)) {
                                        netdev_err(priv->dev,
                                                   "%s: Inconsistent Rx chain\n",
@@ -2607,12 +3398,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                                        break;
                                }
                                prefetch(skb->data - NET_IP_ALIGN);
-                               priv->rx_skbuff[entry] = NULL;
-                               priv->rx_zeroc_thresh++;
+                               rx_q->rx_skbuff[entry] = NULL;
+                               rx_q->rx_zeroc_thresh++;
 
                                skb_put(skb, frame_len);
                                dma_unmap_single(priv->device,
-                                                priv->rx_skbuff_dma[entry],
+                                                rx_q->rx_skbuff_dma[entry],
                                                 priv->dma_buf_sz,
                                                 DMA_FROM_DEVICE);
                        }
@@ -2634,7 +3425,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                        else
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-                       napi_gro_receive(&priv->napi, skb);
+                       napi_gro_receive(&rx_q->napi, skb);
 
                        priv->dev->stats.rx_packets++;
                        priv->dev->stats.rx_bytes += frame_len;
@@ -2642,7 +3433,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                entry = next_entry;
        }
 
-       stmmac_rx_refill(priv);
+       stmmac_rx_refill(priv, queue);
 
        priv->xstats.rx_pkt_n += count;
 
@@ -2659,16 +3450,24 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
  */
 static int stmmac_poll(struct napi_struct *napi, int budget)
 {
-       struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
+       struct stmmac_rx_queue *rx_q =
+               container_of(napi, struct stmmac_rx_queue, napi);
+       struct stmmac_priv *priv = rx_q->priv_data;
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 chan = rx_q->queue_index;
        int work_done = 0;
+       u32 queue;
 
        priv->xstats.napi_poll++;
-       stmmac_tx_clean(priv);
 
-       work_done = stmmac_rx(priv, budget);
+       /* check all the queues */
+       for (queue = 0; queue < tx_count; queue++)
+               stmmac_tx_clean(priv, queue);
+
+       work_done = stmmac_rx(priv, budget, rx_q->queue_index);
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
-               stmmac_enable_dma_irq(priv);
+               stmmac_enable_dma_irq(priv, chan);
        }
        return work_done;
 }
@@ -2684,9 +3483,12 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 static void stmmac_tx_timeout(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 chan;
 
        /* Clear Tx resources and restart transmitting again */
-       stmmac_tx_err(priv);
+       for (chan = 0; chan < tx_count; chan++)
+               stmmac_tx_err(priv, chan);
 }
 
 /**
@@ -2795,6 +3597,12 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 {
        struct net_device *dev = (struct net_device *)dev_id;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queues_count;
+       u32 queue;
+
+       queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt;
 
        if (priv->irq_wake)
                pm_wakeup_event(priv->device, 0);
@@ -2808,16 +3616,30 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
                int status = priv->hw->mac->host_irq_status(priv->hw,
                                                            &priv->xstats);
+
                if (unlikely(status)) {
                        /* For LPI we need to save the tx status */
                        if (status & CORE_IRQ_TX_PATH_IN_LPI_MODE)
                                priv->tx_path_in_lpi_mode = true;
                        if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
                                priv->tx_path_in_lpi_mode = false;
-                       if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
-                               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
-                                                       priv->rx_tail_addr,
-                                                       STMMAC_CHAN0);
+               }
+
+               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+                       for (queue = 0; queue < queues_count; queue++) {
+                               struct stmmac_rx_queue *rx_q =
+                               &priv->rx_queue[queue];
+
+                               status |=
+                               priv->hw->mac->host_mtl_irq_status(priv->hw,
+                                                                  queue);
+
+                               if (status & CORE_IRQ_MTL_RX_OVERFLOW &&
+                                   priv->hw->dma->set_rx_tail_ptr)
+                                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                               rx_q->rx_tail_addr,
+                                                               queue);
+                       }
                }
 
                /* PCS link status */
@@ -2915,17 +3737,40 @@ static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v)
 {
        struct net_device *dev = seq->private;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue;
 
-       if (priv->extend_desc) {
-               seq_printf(seq, "Extended RX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_erx, DMA_RX_SIZE, 1, seq);
-               seq_printf(seq, "Extended TX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_etx, DMA_TX_SIZE, 1, seq);
-       } else {
-               seq_printf(seq, "RX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_rx, DMA_RX_SIZE, 0, seq);
-               seq_printf(seq, "TX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_tx, DMA_TX_SIZE, 0, seq);
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               seq_printf(seq, "RX Queue %d:\n", queue);
+
+               if (priv->extend_desc) {
+                       seq_printf(seq, "Extended descriptor ring:\n");
+                       sysfs_display_ring((void *)rx_q->dma_erx,
+                                          DMA_RX_SIZE, 1, seq);
+               } else {
+                       seq_printf(seq, "Descriptor ring:\n");
+                       sysfs_display_ring((void *)rx_q->dma_rx,
+                                          DMA_RX_SIZE, 0, seq);
+               }
+       }
+
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               seq_printf(seq, "TX Queue %d:\n", queue);
+
+               if (priv->extend_desc) {
+                       seq_printf(seq, "Extended descriptor ring:\n");
+                       sysfs_display_ring((void *)tx_q->dma_etx,
+                                          DMA_TX_SIZE, 1, seq);
+               } else {
+                       seq_printf(seq, "Descriptor ring:\n");
+                       sysfs_display_ring((void *)tx_q->dma_tx,
+                                          DMA_TX_SIZE, 0, seq);
+               }
        }
 
        return 0;
@@ -3208,11 +4053,14 @@ int stmmac_dvr_probe(struct device *device,
                     struct plat_stmmacenet_data *plat_dat,
                     struct stmmac_resources *res)
 {
-       int ret = 0;
        struct net_device *ndev = NULL;
        struct stmmac_priv *priv;
+       int ret = 0;
+       u32 queue;
 
-       ndev = alloc_etherdev(sizeof(struct stmmac_priv));
+       ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
+                                 MTL_MAX_TX_QUEUES,
+                                 MTL_MAX_RX_QUEUES);
        if (!ndev)
                return -ENOMEM;
 
@@ -3254,6 +4102,10 @@ int stmmac_dvr_probe(struct device *device,
        if (ret)
                goto error_hw_init;
 
+       /* Configure real RX and TX queues */
+       ndev->real_num_rx_queues = priv->plat->rx_queues_to_use;
+       ndev->real_num_tx_queues = priv->plat->tx_queues_to_use;
+
        ndev->netdev_ops = &stmmac_netdev_ops;
 
        ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -3303,7 +4155,12 @@ int stmmac_dvr_probe(struct device *device,
                         "Enable RX Mitigation via HW Watchdog Timer\n");
        }
 
-       netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+       for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
+                              (8 * priv->plat->rx_queues_to_use));
+       }
 
        spin_lock_init(&priv->lock);
 
@@ -3348,7 +4205,11 @@ error_netdev_register:
            priv->hw->pcs != STMMAC_PCS_RTBI)
                stmmac_mdio_unregister(ndev);
 error_mdio_register:
-       netif_napi_del(&priv->napi);
+       for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               netif_napi_del(&rx_q->napi);
+       }
 error_hw_init:
        free_netdev(ndev);
 
@@ -3369,10 +4230,9 @@ int stmmac_dvr_remove(struct device *dev)
 
        netdev_info(priv->dev, "%s: removing driver", __func__);
 
-       priv->hw->dma->stop_rx(priv->ioaddr);
-       priv->hw->dma->stop_tx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
        netif_carrier_off(ndev);
        unregister_netdev(ndev);
        if (priv->plat->stmmac_rst)
@@ -3411,20 +4271,19 @@ int stmmac_suspend(struct device *dev)
        spin_lock_irqsave(&priv->lock, flags);
 
        netif_device_detach(ndev);
-       netif_stop_queue(ndev);
+       stmmac_stop_all_queues(priv);
 
-       napi_disable(&priv->napi);
+       stmmac_disable_all_queues(priv);
 
        /* Stop TX/RX DMA */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Enable Power down mode by programming the PMT regs */
        if (device_may_wakeup(priv->device)) {
                priv->hw->mac->pmt(priv->hw, priv->wolopts);
                priv->irq_wake = 1;
        } else {
-               stmmac_set_mac(priv->ioaddr, false);
+               priv->hw->mac->set_mac(priv->ioaddr, false);
                pinctrl_pm_select_sleep_state(priv->device);
                /* Disable clock in case of PWM is off */
                clk_disable(priv->plat->pclk);
@@ -3439,6 +4298,31 @@ int stmmac_suspend(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(stmmac_suspend);
 
+/**
+ * stmmac_reset_queues_param - reset queue parameters
+ * @dev: device pointer
+ */
+static void stmmac_reset_queues_param(struct stmmac_priv *priv)
+{
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               rx_q->cur_rx = 0;
+               rx_q->dirty_rx = 0;
+       }
+
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               tx_q->cur_tx = 0;
+               tx_q->dirty_tx = 0;
+       }
+}
+
 /**
  * stmmac_resume - resume callback
  * @dev: device pointer
@@ -3479,10 +4363,8 @@ int stmmac_resume(struct device *dev)
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       priv->cur_rx = 0;
-       priv->dirty_rx = 0;
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
+       stmmac_reset_queues_param(priv);
+
        /* reset private mss value to force mss context settings at
         * next tso xmit (only used for gmac4).
         */
@@ -3494,9 +4376,9 @@ int stmmac_resume(struct device *dev)
        stmmac_init_tx_coalesce(priv);
        stmmac_set_rx_mode(ndev);
 
-       napi_enable(&priv->napi);
+       stmmac_enable_all_queues(priv);
 
-       netif_start_queue(ndev);
+       stmmac_start_all_queues(priv);
 
        spin_unlock_irqrestore(&priv->lock, flags);