Merge branch 'ena-fixes'

author David S. Miller <davem@davemloft.net>

Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)

committer David S. Miller <davem@davemloft.net>

Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)
author David S. Miller <davem@davemloft.net>
Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)
committer David S. Miller <davem@davemloft.net>
Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c

index 08d11cede9c972596ee683c5d255fe143b76b9b8..f5b237e0bd60e2f0e2e6fd5a95d78515285629b1 100644 (file)
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -61,6 +61,8 @@
  
  #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
  
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
  /*****************************************************************************/
  /*****************************************************************************/
  /*****************************************************************************/
@@ -232,11 +234,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
         tail_masked = admin_queue->sq.tail & queue_size_mask;
  
         /* In case of queue FULL */
-       cnt = admin_queue->sq.tail - admin_queue->sq.head;
+       cnt = atomic_read(&admin_queue->outstanding_cmds);
         if (cnt >= admin_queue->q_depth) {
-               pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
-                        admin_queue->sq.tail, admin_queue->sq.head,
-                        admin_queue->q_depth);
+               pr_debug("admin queue is full.\n");
                 admin_queue->stats.out_of_space++;
                 return ERR_PTR(-ENOSPC);
         }
@@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
  static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
                                                      struct ena_com_admin_queue *admin_queue)
  {
-       unsigned long flags;
-       u32 start_time;
+       unsigned long flags, timeout;
         int ret;
  
-       start_time = ((u32)jiffies_to_usecs(jiffies));
+       timeout = jiffies + ADMIN_CMD_TIMEOUT_US;
+
+       while (1) {
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+               ena_com_handle_admin_completion(admin_queue);
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+               if (comp_ctx->status != ENA_CMD_SUBMITTED)
+                       break;
  
-       while (comp_ctx->status == ENA_CMD_SUBMITTED) {
-               if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
-                   ADMIN_CMD_TIMEOUT_US) {
+               if (time_is_before_jiffies(timeout)) {
                         pr_err("Wait for completion (polling) timeout\n");
                         /* ENA didn't have any completion */
                         spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
                         goto err;
                 }
  
-               spin_lock_irqsave(&admin_queue->q_lock, flags);
-               ena_com_handle_admin_completion(admin_queue);
-               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-
                 msleep(100);
         }
  
@@ -1455,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
  
  void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
  {
+       u32 mask_value = 0;
+
+       if (polling)
+               mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+       writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
         ena_dev->admin_queue.polling = polling;
  }
  
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c

index 67b2338f8fb34100df983fc11727d4e661548b24..3ee55e2fd69465e12603890bce1b530be551a2d9 100644 (file)
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
         ENA_STAT_TX_ENTRY(tx_poll),
         ENA_STAT_TX_ENTRY(doorbells),
         ENA_STAT_TX_ENTRY(prepare_ctx_err),
-       ENA_STAT_TX_ENTRY(missing_tx_comp),
         ENA_STAT_TX_ENTRY(bad_req_id),
  };
  
@@ -94,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
         ENA_STAT_RX_ENTRY(dma_mapping_err),
         ENA_STAT_RX_ENTRY(bad_desc_num),
         ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+       ENA_STAT_RX_ENTRY(empty_rx_ring),
  };
  
  static const struct ena_stats ena_stats_ena_com_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c

index 7c1214d7885566ded4dfc05e85c2ee86b8d3c949..4f16ed38bcf3a267f84894177da1f89713a0eb3e 100644 (file)
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
                 rxr->sgl_size = adapter->max_rx_sgl_size;
                 rxr->smoothed_interval =
                         ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+               rxr->empty_rx_queue = 0;
         }
  }
  
@@ -1078,6 +1079,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
         rx_ring->per_napi_bytes = 0;
  }
  
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+                                       struct ena_ring *rx_ring)
+{
+       struct ena_eth_io_intr_reg intr_reg;
+
+       /* Update intr register: rx intr delay,
+        * tx intr delay and interrupt unmask
+        */
+       ena_com_update_intr_reg(&intr_reg,
+                               rx_ring->smoothed_interval,
+                               tx_ring->smoothed_interval,
+                               true);
+
+       /* It is a shared MSI-X.
+        * Tx and Rx CQ have pointer to it.
+        * So we use one of them to reach the intr reg
+        */
+       ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
  static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
                                              struct ena_ring *rx_ring)
  {
@@ -1108,7 +1129,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
  {
         struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
         struct ena_ring *tx_ring, *rx_ring;
-       struct ena_eth_io_intr_reg intr_reg;
  
         u32 tx_work_done;
         u32 rx_work_done;
@@ -1149,22 +1169,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
                         if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
                                 ena_adjust_intr_moderation(rx_ring, tx_ring);
  
-                       /* Update intr register: rx intr delay,
-                        * tx intr delay and interrupt unmask
-                        */
-                       ena_com_update_intr_reg(&intr_reg,
-                                               rx_ring->smoothed_interval,
-                                               tx_ring->smoothed_interval,
-                                               true);
-
-                       /* It is a shared MSI-X.
-                        * Tx and Rx CQ have pointer to it.
-                        * So we use one of them to reach the intr reg
-                        */
-                       ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+                       ena_unmask_interrupt(tx_ring, rx_ring);
                 }
  
-
                 ena_update_ring_numa_node(tx_ring, rx_ring);
  
                 ret = rx_work_done;
@@ -1485,6 +1492,11 @@ static int ena_up_complete(struct ena_adapter *adapter)
  
         ena_napi_enable_all(adapter);
  
+       /* Enable completion queues interrupt */
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_unmask_interrupt(&adapter->tx_ring[i],
+                                    &adapter->rx_ring[i]);
+
         /* schedule napi in case we had pending packets
          * from the last time we disable napi
          */
@@ -1532,6 +1544,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
                           "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
                           qid, rc);
                 ena_com_destroy_io_queue(ena_dev, ena_qid);
+               return rc;
         }
  
         ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1596,6 +1609,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
                           "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
                           qid, rc);
                 ena_com_destroy_io_queue(ena_dev, ena_qid);
+               return rc;
         }
  
         ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1981,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
         tx_info->tx_descs = nb_hw_desc;
         tx_info->last_jiffies = jiffies;
+       tx_info->print_once = 0;
  
         tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
                 tx_ring->ring_size);
@@ -2550,13 +2565,44 @@ err:
                 "Reset attempt failed. Can not reset the device\n");
  }
  
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static int check_missing_comp_in_queue(struct ena_adapter *adapter,
+                                      struct ena_ring *tx_ring)
  {
         struct ena_tx_buffer *tx_buf;
         unsigned long last_jiffies;
+       u32 missed_tx = 0;
+       int i;
+
+       for (i = 0; i < tx_ring->ring_size; i++) {
+               tx_buf = &tx_ring->tx_buffer_info[i];
+               last_jiffies = tx_buf->last_jiffies;
+               if (unlikely(last_jiffies &&
+                            time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+                       if (!tx_buf->print_once)
+                               netif_notice(adapter, tx_err, adapter->netdev,
+                                            "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+                                            tx_ring->qid, i);
+
+                       tx_buf->print_once = 1;
+                       missed_tx++;
+
+                       if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+                               netif_err(adapter, tx_err, adapter->netdev,
+                                         "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+                                         missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+                               set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+                               return -EIO;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
         struct ena_ring *tx_ring;
-       int i, j, budget;
-       u32 missed_tx;
+       int i, budget, rc;
  
         /* Make sure the driver doesn't turn the device in other process */
         smp_rmb();
@@ -2572,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
         for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
                 tx_ring = &adapter->tx_ring[i];
  
-               for (j = 0; j < tx_ring->ring_size; j++) {
-                       tx_buf = &tx_ring->tx_buffer_info[j];
-                       last_jiffies = tx_buf->last_jiffies;
-                       if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
-                               netif_notice(adapter, tx_err, adapter->netdev,
-                                            "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
-                                            tx_ring->qid, j);
-
-                               u64_stats_update_begin(&tx_ring->syncp);
-                               missed_tx = tx_ring->tx_stats.missing_tx_comp++;
-                               u64_stats_update_end(&tx_ring->syncp);
-
-                               /* Clear last jiffies so the lost buffer won't
-                                * be counted twice.
-                                */
-                               tx_buf->last_jiffies = 0;
-
-                               if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
-                                       netif_err(adapter, tx_err, adapter->netdev,
-                                                 "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
-                                                 missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
-                                       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-                               }
-                       }
-               }
+               rc = check_missing_comp_in_queue(adapter, tx_ring);
+               if (unlikely(rc))
+                       return;
  
                 budget--;
                 if (!budget)
@@ -2606,6 +2630,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
         adapter->last_monitored_tx_qid = i % adapter->num_queues;
  }
  
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+       struct ena_ring *rx_ring;
+       int i, refill_required;
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               return;
+
+       if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+               return;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rx_ring = &adapter->rx_ring[i];
+
+               refill_required =
+                       ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+               if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+                       rx_ring->empty_rx_queue++;
+
+                       if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+                               u64_stats_update_begin(&rx_ring->syncp);
+                               rx_ring->rx_stats.empty_rx_ring++;
+                               u64_stats_update_end(&rx_ring->syncp);
+
+                               netif_err(adapter, drv, adapter->netdev,
+                                         "trigger refill for ring %d\n", i);
+
+                               napi_schedule(rx_ring->napi);
+                               rx_ring->empty_rx_queue = 0;
+                       }
+               } else {
+                       rx_ring->empty_rx_queue = 0;
+               }
+       }
+}
+
  /* Check for keep alive expiration */
  static void check_for_missing_keep_alive(struct ena_adapter *adapter)
  {
@@ -2660,6 +2736,8 @@ static void ena_timer_service(unsigned long data)
  
         check_for_missing_tx_completions(adapter);
  
+       check_for_empty_rx_ring(adapter);
+
         if (debug_area)
                 ena_dump_stats_to_buf(adapter, debug_area);
  
@@ -2840,6 +2918,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
  {
         int release_bars;
  
+       if (ena_dev->mem_bar)
+               devm_iounmap(&pdev->dev, ena_dev->mem_bar);
+
+       devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+
         release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
         pci_release_selected_regions(pdev, release_bars);
  }
@@ -2927,8 +3010,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_free_ena_dev;
         }
  
-       ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
-                                  pci_resource_len(pdev, ENA_REG_BAR));
+       ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+                                       pci_resource_start(pdev, ENA_REG_BAR),
+                                       pci_resource_len(pdev, ENA_REG_BAR));
         if (!ena_dev->reg_bar) {
                 dev_err(&pdev->dev, "failed to remap regs bar\n");
                 rc = -EFAULT;
@@ -2948,8 +3032,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
  
         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-               ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
-                                             pci_resource_len(pdev, ENA_MEM_BAR));
+               ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+                                                  pci_resource_start(pdev, ENA_MEM_BAR),
+                                                  pci_resource_len(pdev, ENA_MEM_BAR));
                 if (!ena_dev->mem_bar) {
                         rc = -EFAULT;
                         goto err_device_destroy;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h

index 0e22bce6239d0e06c73a366e0d98a2348a9b7fa9..a4d3d5e2106885b093424dc2ae3856c002653d06 100644 (file)
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -45,7 +45,7 @@
  
  #define DRV_MODULE_VER_MAJOR   1
  #define DRV_MODULE_VER_MINOR   1
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 7
  
  #define DRV_MODULE_NAME                "ena"
  #ifndef DRV_MODULE_VERSION
@@ -146,7 +146,18 @@ struct ena_tx_buffer {
         u32 tx_descs;
         /* num of buffers used by this skb */
         u32 num_of_bufs;
-       /* Save the last jiffies to detect missing tx packets */
+
+       /* Used for detect missing tx packets to limit the number of prints */
+       u32 print_once;
+       /* Save the last jiffies to detect missing tx packets
+        *
+        * sets to non zero value on ena_start_xmit and set to zero on
+        * napi and timer_Service_routine.
+        *
+        * while this value is not protected by lock,
+        * a given packet is not expected to be handled by ena_start_xmit
+        * and by napi/timer_service at the same time.
+        */
         unsigned long last_jiffies;
         struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
  } ____cacheline_aligned;
@@ -170,7 +181,6 @@ struct ena_stats_tx {
         u64 napi_comp;
         u64 tx_poll;
         u64 doorbells;
-       u64 missing_tx_comp;
         u64 bad_req_id;
  };
  
@@ -184,6 +194,7 @@ struct ena_stats_rx {
         u64 dma_mapping_err;
         u64 bad_desc_num;
         u64 rx_copybreak_pkt;
+       u64 empty_rx_ring;
  };
  
  struct ena_ring {
@@ -231,6 +242,7 @@ struct ena_ring {
                 struct ena_stats_tx tx_stats;
                 struct ena_stats_rx rx_stats;
         };
+       int empty_rx_queue;
  } ____cacheline_aligned;
  
  struct ena_stats_dev {
author	David S. Miller <davem@davemloft.net>
	Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)
committer	David S. Miller <davem@davemloft.net>
	Sun, 11 Jun 2017 20:36:48 +0000 (16:36 -0400)
drivers/net/ethernet/amazon/ena/ena_com.c		patch \| blob \| history
drivers/net/ethernet/amazon/ena/ena_ethtool.c		patch \| blob \| history
drivers/net/ethernet/amazon/ena/ena_netdev.c		patch \| blob \| history
drivers/net/ethernet/amazon/ena/ena_netdev.h		patch \| blob \| history