tcp: refine TSO splits

author Eric Dumazet <edumazet@google.com>

Fri, 13 Dec 2013 21:51:23 +0000 (13:51 -0800)

committer David S. Miller <davem@davemloft.net>

Tue, 17 Dec 2013 20:15:25 +0000 (15:15 -0500)
author Eric Dumazet <edumazet@google.com>
Fri, 13 Dec 2013 21:51:23 +0000 (13:51 -0800)
committer David S. Miller <davem@davemloft.net>
Tue, 17 Dec 2013 20:15:25 +0000 (15:15 -0500)
diff --git a/include/net/tcp.h b/include/net/tcp.h

index f7e1ab2139efe7f6208836c1797a2db6fb603749..9cd62bc090553f529f0ff4f435d20ef5713deff5 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -978,13 +978,6 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
  }
  bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
  
-static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
-                                      const struct sk_buff *skb)
-{
-       if (skb->len < mss)
-               tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
-}
-
  static inline void tcp_check_probe_timer(struct sock *sk)
  {
         const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 2a69f42e51cab1b544a8ef7ebe6b7599a59d8934..9e7aec7ee67e9d760e7875cab3f8ab6326580cff 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1384,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk)
         }
  }
  
-/* Returns the portion of skb which can be sent right away without
- * introducing MSS oddities to segment boundaries. In rare cases where
- * mss_now != mss_cache, we will request caller to create a small skb
- * per input skb which could be mostly avoided here (if desired).
- *
- * We explicitly want to create a request for splitting write queue tail
- * to a small skb for Nagle purposes while avoiding unnecessary modulos,
- * thus all the complexity (cwnd_len is always MSS multiple which we
- * return whenever allowed by the other factors). Basically we need the
- * modulo only when the receiver window alone is the limiting factor or
- * when we would be allowed to send the split-due-to-Nagle skb fully.
+/* Minshall's variant of the Nagle send check. */
+static bool tcp_minshall_check(const struct tcp_sock *tp)
+{
+       return after(tp->snd_sml, tp->snd_una) &&
+               !after(tp->snd_sml, tp->snd_nxt);
+}
+
+/* Update snd_sml if this skb is under mss
+ * Note that a TSO packet might end with a sub-mss segment
+ * The test is really :
+ * if ((skb->len % mss) != 0)
+ *        tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
+ * But we can avoid doing the divide again given we already have
+ *  skb_pcount = skb->len / mss_now
   */
-static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
-                                       unsigned int mss_now, unsigned int max_segs)
+static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
+                               const struct sk_buff *skb)
+{
+       if (skb->len < tcp_skb_pcount(skb) * mss_now)
+               tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
+}
+
+/* Return false, if packet can be sent now without violation Nagle's rules:
+ * 1. It is full sized. (provided by caller in %partial bool)
+ * 2. Or it contains FIN. (already checked by caller)
+ * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
+ * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
+ *    With Minshall's modification: all sent small packets are ACKed.
+ */
+static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
+                           unsigned int mss_now, int nonagle)
+{
+       return partial &&
+               ((nonagle & TCP_NAGLE_CORK) ||
+                (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
+}
+/* Returns the portion of skb which can be sent right away */
+static unsigned int tcp_mss_split_point(const struct sock *sk,
+                                       const struct sk_buff *skb,
+                                       unsigned int mss_now,
+                                       unsigned int max_segs,
+                                       int nonagle)
  {
         const struct tcp_sock *tp = tcp_sk(sk);
-       u32 needed, window, max_len;
+       u32 partial, needed, window, max_len;
  
         window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
         max_len = mss_now * max_segs;
@@ -1413,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b
         if (max_len <= needed)
                 return max_len;
  
-       return needed - needed % mss_now;
+       partial = needed % mss_now;
+       /* If last segment is not a full MSS, check if Nagle rules allow us
+        * to include this last segment in this skb.
+        * Otherwise, we'll split the skb at last MSS boundary
+        */
+       if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle))
+               return needed - partial;
+
+       return needed;
  }
  
  /* Can at least one segment of SKB be sent right now, according to the
@@ -1453,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
         return tso_segs;
  }
  
-/* Minshall's variant of the Nagle send check. */
-static inline bool tcp_minshall_check(const struct tcp_sock *tp)
-{
-       return after(tp->snd_sml, tp->snd_una) &&
-               !after(tp->snd_sml, tp->snd_nxt);
-}
-
-/* Return false, if packet can be sent now without violation Nagle's rules:
- * 1. It is full sized.
- * 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
- * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
- *    With Minshall's modification: all sent small packets are ACKed.
- */
-static inline bool tcp_nagle_check(const struct tcp_sock *tp,
-                                 const struct sk_buff *skb,
-                                 unsigned int mss_now, int nonagle)
-{
-       return skb->len < mss_now &&
-               ((nonagle & TCP_NAGLE_CORK) ||
-                (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
-}
  
  /* Return true if the Nagle test allows this packet to be
   * sent now.
@@ -1495,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
         if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
                 return true;
  
-       if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
+       if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
                 return true;
  
         return false;
@@ -1898,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                         limit = tcp_mss_split_point(sk, skb, mss_now,
                                                     min_t(unsigned int,
                                                           cwnd_quota,
-                                                         sk->sk_gso_max_segs));
+                                                         sk->sk_gso_max_segs),
+                                                   nonagle);
  
                 if (skb->len > limit &&
                     unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
author	Eric Dumazet <edumazet@google.com>
	Fri, 13 Dec 2013 21:51:23 +0000 (13:51 -0800)
committer	David S. Miller <davem@davemloft.net>
	Tue, 17 Dec 2013 20:15:25 +0000 (15:15 -0500)
include/net/tcp.h		patch \| blob \| history
net/ipv4/tcp_output.c		patch \| blob \| history