tcp/dccp: block BH for SYN processing

[karo-tx-linux.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 41dcbd568cbe2403f2a9e659669afe462a42e228..39c393cc0fd3c17130cd5d8d8b37f31ad3aeafd9 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,7 +79,7 @@
  int sysctl_tcp_timestamps __read_mostly = 1;
  int sysctl_tcp_window_scaling __read_mostly = 1;
  int sysctl_tcp_sack __read_mostly = 1;
-int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly;
  int sysctl_tcp_max_reordering __read_mostly = 300;
  int sysctl_tcp_dsack __read_mostly = 1;
  int sysctl_tcp_app_win __read_mostly = 31;
@@ -95,9 +95,6 @@ int sysctl_tcp_rfc1337 __read_mostly;
  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
  int sysctl_tcp_frto __read_mostly = 2;
  int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
-
-int sysctl_tcp_thin_dupack __read_mostly;
-
  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
  int sysctl_tcp_early_retrans __read_mostly = 3;
  int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
@@ -904,8 +901,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
                 tcp_disable_fack(tp);
         }
  
-       if (metric > 0)
-               tcp_disable_early_retrans(tp);
         tp->rack.reord = 1;
  }
  
@@ -916,10 +911,6 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
             before(TCP_SKB_CB(skb)->seq,
                    TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
                 tp->retransmit_skb_hint = skb;
-
-       if (!tp->lost_out ||
-           after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
-               tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
  }
  
  /* Sum the number of packets on the wire we have marked as lost.
@@ -1135,6 +1126,7 @@ struct tcp_sacktag_state {
          */
         struct skb_mstamp first_sackt;
         struct skb_mstamp last_sackt;
+       struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
         struct rate_sample *rate;
         int     flag;
  };
@@ -1217,7 +1209,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
                 return sacked;
  
         if (!(sacked & TCPCB_SACKED_ACKED)) {
-               tcp_rack_advance(tp, xmit_time, sacked);
+               tcp_rack_advance(tp, sacked, end_seq,
+                                xmit_time, &state->ack_time);
  
                 if (sacked & TCPCB_SACKED_RETRANS) {
                         /* If the segment is not tagged as lost,
@@ -1937,7 +1930,6 @@ void tcp_enter_loss(struct sock *sk)
         struct tcp_sock *tp = tcp_sk(sk);
         struct net *net = sock_net(sk);
         struct sk_buff *skb;
-       bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
         bool is_reneg;                  /* is receiver reneging on SACKs? */
         bool mark_lost;
  
@@ -1982,7 +1974,6 @@ void tcp_enter_loss(struct sock *sk)
                         TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                         TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                         tp->lost_out += tcp_skb_pcount(skb);
-                       tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
                 }
         }
         tcp_verify_left_out(tp);
@@ -1998,13 +1989,15 @@ void tcp_enter_loss(struct sock *sk)
         tp->high_seq = tp->snd_nxt;
         tcp_ecn_queue_cwr(tp);
  
-       /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
-        * loss recovery is underway except recurring timeout(s) on
-        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+       /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO
+        * if a previous recovery is underway, otherwise it may incorrectly
+        * call a timeout spurious if some previously retransmitted packets
+        * are s/acked (sec 3.2). We do not apply that retriction since
+        * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS
+        * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO
+        * on PTMU discovery to avoid sending new data.
          */
-       tp->frto = sysctl_tcp_frto &&
-                  (new_recovery || icsk->icsk_retransmits) &&
-                  !inet_csk(sk)->icsk_mtup.probe_size;
+       tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size;
  }
  
  /* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2056,30 +2049,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
         return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
  }
  
-static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       unsigned long delay;
-
-       /* Delay early retransmit and entering fast recovery for
-        * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
-        * available, or RTO is scheduled to fire first.
-        */
-       if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
-           (flag & FLAG_ECE) || !tp->srtt_us)
-               return false;
-
-       delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
-                   msecs_to_jiffies(2));
-
-       if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
-               return false;
-
-       inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
-                                 TCP_RTO_MAX);
-       return true;
-}
-
  /* Linux NewReno/SACK/FACK/ECN state machine.
   * --------------------------------------
   *
@@ -2127,10 +2096,26 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
   *     F.e. after RTO, when all the queue is considered as lost,
   *     lost_out = packets_out and in_flight = retrans_out.
   *
- *             Essentially, we have now two algorithms counting
+ *             Essentially, we have now a few algorithms detecting
   *             lost packets.
   *
- *             FACK: It is the simplest heuristics. As soon as we decided
+ *             If the receiver supports SACK:
+ *
+ *             RFC6675/3517: It is the conventional algorithm. A packet is
+ *             considered lost if the number of higher sequence packets
+ *             SACKed is greater than or equal the DUPACK thoreshold
+ *             (reordering). This is implemented in tcp_mark_head_lost and
+ *             tcp_update_scoreboard.
+ *
+ *             RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
+ *             (2017-) that checks timing instead of counting DUPACKs.
+ *             Essentially a packet is considered lost if it's not S/ACKed
+ *             after RTT + reordering_window, where both metrics are
+ *             dynamically measured and adjusted. This is implemented in
+ *             tcp_rack_mark_lost.
+ *
+ *             FACK (Disabled by default. Subsumbed by RACK):
+ *             It is the simplest heuristics. As soon as we decided
   *             that something is lost, we decide that _all_ not SACKed
   *             packets until the most forward SACK are lost. I.e.
   *             lost_out = fackets_out - sacked_out and left_out = fackets_out.
@@ -2139,16 +2124,14 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
   *             takes place. We use FACK by default until reordering
   *             is suspected on the path to this destination.
   *
- *             NewReno: when Recovery is entered, we assume that one segment
+ *             If the receiver does not support SACK:
+ *
+ *             NewReno (RFC6582): in Recovery we assume that one segment
   *             is lost (classic Reno). While we are in Recovery and
   *             a partial ACK arrives, we assume that one more packet
   *             is lost (NewReno). This heuristics are the same in NewReno
   *             and SACK.
   *
- *  Imagine, that's all! Forget about all this shamanism about CWND inflation
- *  deflation etc. CWND is real congestion window, never inflated, changes
- *  only according to classic VJ rules.
- *
   * Really tricky (and requiring careful tuning) part of algorithm
   * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
   * The first determines the moment _when_ we should reduce CWND and,
@@ -2176,8 +2159,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
  static bool tcp_time_to_recover(struct sock *sk, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       __u32 packets_out;
-       int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
  
         /* Trick#1: The loss is proven. */
         if (tp->lost_out)
@@ -2187,39 +2168,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
         if (tcp_dupack_heuristics(tp) > tp->reordering)
                 return true;
  
-       /* Trick#4: It is still not OK... But will it be useful to delay
-        * recovery more?
-        */
-       packets_out = tp->packets_out;
-       if (packets_out <= tp->reordering &&
-           tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
-           !tcp_may_send_now(sk)) {
-               /* We have nothing to send. This connection is limited
-                * either by receiver window or by application.
-                */
-               return true;
-       }
-
-       /* If a thin stream is detected, retransmit after first
-        * received dupack. Employ only if SACK is supported in order
-        * to avoid possible corner-case series of spurious retransmissions
-        * Use only if there are no unsent data.
-        */
-       if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
-           tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
-           tcp_is_sack(tp) && !tcp_send_head(sk))
-               return true;
-
-       /* Trick#6: TCP early retransmit, per RFC5827.  To avoid spurious
-        * retransmissions due to small network reorderings, we implement
-        * Mitigation A.3 in the RFC and delay the retransmission for a short
-        * interval if appropriate.
-        */
-       if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
-           (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
-           !tcp_may_send_now(sk))
-               return !tcp_pause_early_retransmit(sk, flag);
-
         return false;
  }
  
@@ -2521,8 +2469,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
         tcp_ecn_queue_cwr(tp);
  }
  
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
-                              int flag)
+void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         int sndcnt = 0;
@@ -2690,7 +2637,7 @@ void tcp_simple_retransmit(struct sock *sk)
  }
  EXPORT_SYMBOL(tcp_simple_retransmit);
  
-static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
+void tcp_enter_recovery(struct sock *sk, bool ece_ack)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         int mib_idx;
@@ -2726,14 +2673,18 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
             tcp_try_undo_loss(sk, false))
                 return;
  
-       if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
-               /* Step 3.b. A timeout is spurious if not all data are
-                * lost, i.e., never-retransmitted data are (s)acked.
-                */
-               if ((flag & FLAG_ORIG_SACK_ACKED) &&
-                   tcp_try_undo_loss(sk, true))
-                       return;
+       /* The ACK (s)acks some never-retransmitted data meaning not all
+        * the data packets before the timeout were lost. Therefore we
+        * undo the congestion window and state. This is essentially
+        * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since
+        * a retransmitted skb is permantly marked, we can apply such an
+        * operation even if F-RTO was not used.
+        */
+       if ((flag & FLAG_ORIG_SACK_ACKED) &&
+           tcp_try_undo_loss(sk, tp->undo_marker))
+               return;
  
+       if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
                 if (after(tp->snd_nxt, tp->high_seq)) {
                         if (flag & FLAG_DATA_SACKED || is_dupack)
                                 tp->frto = 0; /* Step 3.a. loss was real */
@@ -2800,6 +2751,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
         return false;
  }
  
+static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
+                                  const struct skb_mstamp *ack_time)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       /* Use RACK to detect loss */
+       if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+               u32 prior_retrans = tp->retrans_out;
+
+               tcp_rack_mark_lost(sk, ack_time);
+               if (prior_retrans > tp->retrans_out)
+                       *ack_flag |= FLAG_LOST_RETRANS;
+       }
+}
+
  /* Process an event, which can update packets-in-flight not trivially.
   * Main goal of this function is to calculate new estimate for left_out,
   * taking into account both packets sitting in receiver's buffer and
@@ -2813,7 +2779,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
   * tcp_xmit_retransmit_queue().
   */
  static void tcp_fastretrans_alert(struct sock *sk, const int acked,
-                                 bool is_dupack, int *ack_flag, int *rexmit)
+                                 bool is_dupack, int *ack_flag, int *rexmit,
+                                 const struct skb_mstamp *ack_time)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
@@ -2864,13 +2831,6 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                 }
         }
  
-       /* Use RACK to detect loss */
-       if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
-           tcp_rack_mark_lost(sk)) {
-               flag |= FLAG_LOST_RETRANS;
-               *ack_flag |= FLAG_LOST_RETRANS;
-       }
-
         /* E. Process state. */
         switch (icsk->icsk_ca_state) {
         case TCP_CA_Recovery:
@@ -2888,11 +2848,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                         tcp_try_keep_open(sk);
                         return;
                 }
+               tcp_rack_identify_loss(sk, ack_flag, ack_time);
                 break;
         case TCP_CA_Loss:
                 tcp_process_loss(sk, flag, is_dupack, rexmit);
-               if (icsk->icsk_ca_state != TCP_CA_Open &&
-                   !(flag & FLAG_LOST_RETRANS))
+               tcp_rack_identify_loss(sk, ack_flag, ack_time);
+               if (!(icsk->icsk_ca_state == TCP_CA_Open ||
+                     (*ack_flag & FLAG_LOST_RETRANS)))
                         return;
                 /* Change state if cwnd is undone or retransmits are lost */
         default:
@@ -2906,6 +2868,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
                         tcp_try_undo_dsack(sk);
  
+               tcp_rack_identify_loss(sk, ack_flag, ack_time);
                 if (!tcp_time_to_recover(sk, flag)) {
                         tcp_try_to_open(sk, flag);
                         return;
@@ -3024,7 +2987,7 @@ void tcp_rearm_rto(struct sock *sk)
         } else {
                 u32 rto = inet_csk(sk)->icsk_rto;
                 /* Offset the time elapsed after installing regular RTO */
-               if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+               if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
                     icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
                         struct sk_buff *skb = tcp_write_queue_head(sk);
                         const u32 rto_time_stamp =
@@ -3041,24 +3004,6 @@ void tcp_rearm_rto(struct sock *sk)
         }
  }
  
-/* This function is called when the delayed ER timer fires. TCP enters
- * fast recovery and performs fast-retransmit.
- */
-void tcp_resume_early_retransmit(struct sock *sk)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       tcp_rearm_rto(sk);
-
-       /* Stop if ER is disabled after the delayed ER timer is scheduled */
-       if (!tp->do_early_retrans)
-               return;
-
-       tcp_enter_recovery(sk, false);
-       tcp_update_scoreboard(sk, 1);
-       tcp_xmit_retransmit_queue(sk);
-}
-
  /* If we get here, the whole TSO packet has not been acked. */
  static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  {
@@ -3101,11 +3046,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
   */
  static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                                u32 prior_snd_una, int *acked,
-                              struct tcp_sacktag_state *sack,
-                              struct skb_mstamp *now)
+                              struct tcp_sacktag_state *sack)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         struct skb_mstamp first_ackt, last_ackt;
+       struct skb_mstamp *now = &sack->ack_time;
         struct tcp_sock *tp = tcp_sk(sk);
         u32 prior_sacked = tp->sacked_out;
         u32 reord = tp->packets_out;
@@ -3165,7 +3110,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 } else if (tcp_is_sack(tp)) {
                         tp->delivered += acked_pcount;
                         if (!tcp_skb_spurious_retrans(tp, skb))
-                               tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+                               tcp_rack_advance(tp, sacked, scb->end_seq,
+                                                &skb->skb_mstamp,
+                                                &sack->ack_time);
                 }
                 if (sacked & TCPCB_LOST)
                         tp->lost_out -= acked_pcount;
@@ -3595,7 +3542,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         u32 lost = tp->lost;
         int acked = 0; /* Number of packets newly acked */
         int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
-       struct skb_mstamp now;
  
         sack_state.first_sackt.v64 = 0;
         sack_state.rate = &rs;
@@ -3621,10 +3567,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         if (after(ack, tp->snd_nxt))
                 goto invalid_ack;
  
-       skb_mstamp_get(&now);
+       skb_mstamp_get(&sack_state.ack_time);
  
-       if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
-           icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+       if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                 tcp_rearm_rto(sk);
  
         if (after(ack, prior_snd_una)) {
@@ -3689,34 +3634,34 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  
         /* See if we can take anything off of the retransmit queue. */
         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
-                                   &sack_state, &now);
+                                   &sack_state);
  
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
+                                     &sack_state.ack_time);
         }
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
  
-       if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
-               struct dst_entry *dst = __sk_dst_get(sk);
-               if (dst)
-                       dst_confirm(dst);
-       }
+       if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
+               sk_dst_confirm(sk);
  
         if (icsk->icsk_pending == ICSK_TIME_RETRANS)
                 tcp_schedule_loss_probe(sk);
         delivered = tp->delivered - delivered;  /* freshly ACKed or SACKed */
         lost = tp->lost - lost;                 /* freshly marked lost */
-       tcp_rate_gen(sk, delivered, lost, &now, &rs);
-       tcp_cong_control(sk, ack, delivered, flag, &rs);
+       tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
+                    sack_state.rate);
+       tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
         tcp_xmit_recovery(sk, rexmit);
         return 1;
  
  no_queue:
         /* If data was DSACKed, see if we can undo a cwnd reduction. */
         if (flag & FLAG_DSACKING_ACK)
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
+                                     &sack_state.ack_time);
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
          * it needs to be for normal retransmission.
@@ -3737,9 +3682,11 @@ old_ack:
          * If data was DSACKed, see if we can undo a cwnd reduction.
          */
         if (TCP_SKB_CB(skb)->sacked) {
+               skb_mstamp_get(&sack_state.ack_time);
                 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
                                                 &sack_state);
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
+                                     &sack_state.ack_time);
                 tcp_xmit_recovery(sk, rexmit);
         }
  
@@ -4557,6 +4504,7 @@ add_sack:
  end:
         if (skb) {
                 tcp_grow_window(sk, skb);
+               skb_condense(skb);
                 skb_set_owner_r(skb, sk);
         }
  }
@@ -5249,6 +5197,23 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
         return err;
  }
  
+/* Accept RST for rcv_nxt - 1 after a FIN.
+ * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
+ * FIN is sent followed by a RST packet. The RST is sent with the same
+ * sequence number as the FIN, and thus according to RFC 5961 a challenge
+ * ACK should be sent. However, Mac OSX rate limits replies to challenge
+ * ACKs on the closed socket. In addition middleboxes can drop either the
+ * challenge ACK or a subsequent RST.
+ */
+static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
+                       (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
+                                              TCPF_CLOSING));
+}
+
  /* Does PAWS and seqno based validation of an incoming segment, flags will
   * play significant role here.
   */
@@ -5287,20 +5252,25 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
                                                   LINUX_MIB_TCPACKSKIPPEDSEQ,
                                                   &tp->last_oow_ack_time))
                                 tcp_send_dupack(sk, skb);
+               } else if (tcp_reset_check(sk, skb)) {
+                       tcp_reset(sk);
                 }
                 goto discard;
         }
  
         /* Step 2: check RST bit */
         if (th->rst) {
-               /* RFC 5961 3.2 (extend to match against SACK too if available):
-                * If seq num matches RCV.NXT or the right-most SACK block,
+               /* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a
+                * FIN and SACK too if available):
+                * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or
+                * the right-most SACK block,
                  * then
                  *     RESET the connection
                  * else
                  *     Send a challenge ACK
                  */
-               if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+               if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
+                   tcp_reset_check(sk, skb)) {
                         rst_seq_match = true;
                 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
                         struct tcp_sack_block *sp = &tp->selective_acks[0];
@@ -5916,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 if (th->syn) {
                         if (th->fin)
                                 goto discard;
-                       if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
-                               return 1;
+                       /* It is possible that we process SYN packets from backlog,
+                        * so we need to make sure to disable BH right there.
+                        */
+                       local_bh_disable();
+                       acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+                       local_bh_enable();
  
+                       if (!acceptable)
+                               return 1;
                         consume_skb(skb);
                         return 0;
                 }
@@ -6022,7 +5998,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 break;
  
         case TCP_FIN_WAIT1: {
-               struct dst_entry *dst;
                 int tmo;
  
                 /* If we enter the TCP_FIN_WAIT1 state and we are a
@@ -6049,9 +6024,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 tcp_set_state(sk, TCP_FIN_WAIT2);
                 sk->sk_shutdown |= SEND_SHUTDOWN;
  
-               dst = __sk_dst_get(sk);
-               if (dst)
-                       dst_confirm(dst);
+               sk_dst_confirm(sk);
  
                 if (!sock_flag(sk, SOCK_DEAD)) {
                         /* Wake up lingering close() */
@@ -6363,7 +6336,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                  * timewait bucket, so that all the necessary checks
                  * are made in the function processing timewait state.
                  */
-               if (tcp_death_row.sysctl_tw_recycle) {
+               if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
                         bool strict;
  
                         dst = af_ops->route_req(sk, &fl, req, &strict);
@@ -6377,8 +6350,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                 }
                 /* Kill the following clause, if you dislike this way. */
                 else if (!net->ipv4.sysctl_tcp_syncookies &&
-                        (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-                         (sysctl_max_syn_backlog >> 2)) &&
+                        (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                         (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
                          !tcp_peer_is_proven(req, dst, false,
                                              tmp_opt.saw_tstamp)) {
                         /* Without syncookies last quarter of