From 6c4e548ff36672eeb78f8288a2920d66fa4a6a66 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 16 May 2014 21:48:22 +0200 Subject: [PATCH] net: cdc_ncm: use ethtool to tune coalescing settings MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Datagram coalescing is an integral part of the NCM and MBIM protocols, intended to reduce the interrupt load primarily on the device end of the USB link. As with all coalescing solutions, there is a trade-off between buffering and interrupts. The current defaults are based on the assumption that device side buffers should be the limiting factor. However, many modern high speed LTE modems suffers from buffer-bloat, making this assumption fail. This results in sub-optimal performance due to excessive coalescing. And in cases where such modems are connected to cheap embedded hosts there is often severe buffer allocation issues, giving very noticeable performance degradation . A start on improving this is going from build time hard coded limits to per device user configurable limits. The ethtool coalescing API was selected as user interface because, although the tuned values are buffer sizes, these settings directly control datagram coalescing. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 71 +++++++++++++++++++++++++++++++++++-- include/linux/usb/cdc_ncm.h | 6 +++- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2ec3790a4db8..141dbec912be 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -65,6 +65,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; +static int cdc_ncm_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + + /* assuming maximum sized dgrams and ignoring NDPs */ + ec->rx_max_coalesced_frames = ctx->rx_max / ctx->max_datagram_size; + ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size; + + /* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */ + ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC; + return 0; +} + +static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx); + +static int cdc_ncm_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + u32 new_rx_max = ctx->rx_max; + u32 new_tx_max = ctx->tx_max; + + /* assuming maximum sized dgrams and a single NDP */ + if (ec->rx_max_coalesced_frames) + new_rx_max = ec->rx_max_coalesced_frames * ctx->max_datagram_size; + if (ec->tx_max_coalesced_frames) + new_tx_max = ec->tx_max_coalesced_frames * ctx->max_datagram_size; + + if (ec->tx_coalesce_usecs && + (ec->tx_coalesce_usecs < CDC_NCM_TIMER_INTERVAL_MIN * CDC_NCM_TIMER_PENDING_CNT || + ec->tx_coalesce_usecs > CDC_NCM_TIMER_INTERVAL_MAX * CDC_NCM_TIMER_PENDING_CNT)) + return -EINVAL; + + spin_lock_bh(&ctx->mtx); + ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT; + if (!ctx->timer_interval) + ctx->tx_timer_pending = 0; + spin_unlock_bh(&ctx->mtx); + + /* inform device of new values */ + if (new_rx_max != ctx->rx_max || new_tx_max != ctx->tx_max) + cdc_ncm_update_rxtx_max(dev, new_rx_max, new_tx_max); + return 0; +} + +static const struct ethtool_ops cdc_ncm_ethtool_ops = { + .get_settings = usbnet_get_settings, + .set_settings = usbnet_set_settings, + .get_link = usbnet_get_link, + .nway_reset = usbnet_nway_reset, + .get_drvinfo = usbnet_get_drvinfo, + .get_msglevel = usbnet_get_msglevel, + .set_msglevel = usbnet_set_msglevel, + .get_ts_info = ethtool_op_get_ts_info, + .get_coalesce = cdc_ncm_get_coalesce, + .set_coalesce = cdc_ncm_set_coalesce, +}; + /* handle rx_max and tx_max changes */ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) { @@ -257,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev) (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; + /* initial coalescing timer interval */ + ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; + return 0; } @@ -596,6 +660,9 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* override ethtool_ops */ + dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; + return 0; error2: @@ -863,7 +930,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_curr_skb = skb_out; goto exit_no_skb; - } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) { + } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; @@ -915,7 +982,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx) /* start timer, if not already started */ if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop))) hrtimer_start(&ctx->tx_timer, - ktime_set(0, CDC_NCM_TIMER_INTERVAL), + ktime_set(0, ctx->timer_interval), HRTIMER_MODE_REL); } diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 55b6feead93b..5c1066b4dc41 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -72,7 +72,9 @@ /* Restart the timer, if amount of datagrams is less than given value */ #define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3 #define CDC_NCM_TIMER_PENDING_CNT 2 -#define CDC_NCM_TIMER_INTERVAL (400UL * NSEC_PER_USEC) +#define CDC_NCM_TIMER_INTERVAL_USEC 400UL +#define CDC_NCM_TIMER_INTERVAL_MIN 5UL +#define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) /* The following macro defines the minimum header space */ #define CDC_NCM_MIN_HDR_SIZE \ @@ -107,6 +109,8 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + u64 timer_interval; + u32 tx_timer_pending; u32 tx_curr_frame_num; u32 rx_max; -- 2.39.5