struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
RING_IDX rx_last_skb_slots;
+ bool rx_queue_purge;
+
+ struct timer_list wake_queue;
/* This array is allocated seperately as it is large */
struct gnttab_copy *grant_copy_op;
extern bool separate_tx_rx_irq;
+extern unsigned int rx_drain_timeout_msecs;
+extern unsigned int rx_drain_timeout_jiffies;
+
#endif /* __XEN_NETBACK__COMMON_H__ */
return IRQ_HANDLED;
}
+static void xenvif_wake_queue(unsigned long data)
+{
+ struct xenvif *vif = (struct xenvif *)data;
+
+ if (netif_queue_stopped(vif->dev)) {
+ netdev_err(vif->dev, "draining TX queue\n");
+ vif->rx_queue_purge = true;
+ xenvif_kick_thread(vif);
+ netif_wake_queue(vif->dev);
+ }
+}
+
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
* then turn off the queue to give the ring a chance to
* drain.
*/
- if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
+ if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
+ vif->wake_queue.function = xenvif_wake_queue;
+ vif->wake_queue.data = (unsigned long)vif;
xenvif_stop_queue(vif);
+ mod_timer(&vif->wake_queue,
+ jiffies + rx_drain_timeout_jiffies);
+ }
skb_queue_tail(&vif->rx_queue, skb);
xenvif_kick_thread(vif);
init_timer(&vif->credit_timeout);
vif->credit_window_start = get_jiffies_64();
+ init_timer(&vif->wake_queue);
+
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
xenvif_carrier_off(vif);
if (vif->task) {
+ del_timer_sync(&vif->wake_queue);
kthread_stop(vif->task);
vif->task = NULL;
}
void xenvif_free(struct xenvif *vif)
{
int i, unmap_timeout = 0;
+ /* Here we want to avoid timeout messages if an skb can be legitimatly
+ * stucked somewhere else. Realisticly this could be an another vif's
+ * internal or QDisc queue. That another vif also has this
+ * rx_drain_timeout_msecs timeout, but the timer only ditches the
+ * internal queue. After that, the QDisc queue can put in worst case
+ * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
+ * internal queue, so we need several rounds of such timeouts until we
+ * can be sure that no another vif should have skb's from us. We are
+ * not sending more skb's, so newly stucked packets are not interesting
+ * for us here.
+ */
+ unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
+ DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
for (i = 0; i < MAX_PENDING_REQS; ++i) {
if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
unmap_timeout++;
schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > 9 &&
+ if (unmap_timeout > worst_case_skb_lifetime &&
net_ratelimit())
netdev_err(vif->dev,
"Page still granted! Index: %x\n",
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
+/* When guest ring is filled up, qdisc queues the packets for us, but we have
+ * to timeout them, otherwise other guests' packets can get stucked there
+ */
+unsigned int rx_drain_timeout_msecs = 10000;
+module_param(rx_drain_timeout_msecs, uint, 0444);
+unsigned int rx_drain_timeout_jiffies;
+
/*
* This is the maximum slots a skb can have. If a guest sends a skb
* which exceeds this limit it is considered malicious.
static inline int rx_work_todo(struct xenvif *vif)
{
- return !skb_queue_empty(&vif->rx_queue) &&
- xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots);
+ return (!skb_queue_empty(&vif->rx_queue) &&
+ xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
+ vif->rx_queue_purge;
}
static inline int tx_work_todo(struct xenvif *vif)
if (kthread_should_stop())
break;
+ if (vif->rx_queue_purge) {
+ skb_queue_purge(&vif->rx_queue);
+ vif->rx_queue_purge = false;
+ }
+
if (!skb_queue_empty(&vif->rx_queue))
xenvif_rx_action(vif);
if (skb_queue_empty(&vif->rx_queue) &&
- netif_queue_stopped(vif->dev))
+ netif_queue_stopped(vif->dev)) {
+ del_timer_sync(&vif->wake_queue);
xenvif_start_queue(vif);
+ }
cond_resched();
}
if (rc)
goto failed_init;
+ rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+
return 0;
failed_init: