]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/netfilter/nf_conntrack_proto_tcp.c
netfilter: nf_conntrack: make sequence number adjustments usuable without NAT
[karo-tx-linux.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_log.h>
32 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
33 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
34
35 /* "Be conservative in what you do,
36     be liberal in what you accept from others."
37     If it's non-zero, we mark only out of window RST segments as INVALID. */
38 static int nf_ct_tcp_be_liberal __read_mostly = 0;
39
40 /* If it is set to zero, we disable picking up already established
41    connections. */
42 static int nf_ct_tcp_loose __read_mostly = 1;
43
44 /* Max number of the retransmitted packets without receiving an (acceptable)
45    ACK from the destination. If this number is reached, a shorter timer
46    will be started. */
47 static int nf_ct_tcp_max_retrans __read_mostly = 3;
48
49   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
50      closely.  They're more complex. --RR */
51
52 static const char *const tcp_conntrack_names[] = {
53         "NONE",
54         "SYN_SENT",
55         "SYN_RECV",
56         "ESTABLISHED",
57         "FIN_WAIT",
58         "CLOSE_WAIT",
59         "LAST_ACK",
60         "TIME_WAIT",
61         "CLOSE",
62         "SYN_SENT2",
63 };
64
65 #define SECS * HZ
66 #define MINS * 60 SECS
67 #define HOURS * 60 MINS
68 #define DAYS * 24 HOURS
69
70 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
71         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
72         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
73         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
74         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
75         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
76         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
77         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
78         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
79         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
80 /* RFC1122 says the R2 limit should be at least 100 seconds.
81    Linux uses 15 packets as limit, which corresponds
82    to ~13-30min depending on RTO. */
83         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
84         [TCP_CONNTRACK_UNACK]           = 5 MINS,
85 };
86
87 #define sNO TCP_CONNTRACK_NONE
88 #define sSS TCP_CONNTRACK_SYN_SENT
89 #define sSR TCP_CONNTRACK_SYN_RECV
90 #define sES TCP_CONNTRACK_ESTABLISHED
91 #define sFW TCP_CONNTRACK_FIN_WAIT
92 #define sCW TCP_CONNTRACK_CLOSE_WAIT
93 #define sLA TCP_CONNTRACK_LAST_ACK
94 #define sTW TCP_CONNTRACK_TIME_WAIT
95 #define sCL TCP_CONNTRACK_CLOSE
96 #define sS2 TCP_CONNTRACK_SYN_SENT2
97 #define sIV TCP_CONNTRACK_MAX
98 #define sIG TCP_CONNTRACK_IGNORE
99
100 /* What TCP flags are set from RST/SYN/FIN/ACK. */
101 enum tcp_bit_set {
102         TCP_SYN_SET,
103         TCP_SYNACK_SET,
104         TCP_FIN_SET,
105         TCP_ACK_SET,
106         TCP_RST_SET,
107         TCP_NONE_SET,
108 };
109
110 /*
111  * The TCP state transition table needs a few words...
112  *
113  * We are the man in the middle. All the packets go through us
114  * but might get lost in transit to the destination.
115  * It is assumed that the destinations can't receive segments
116  * we haven't seen.
117  *
118  * The checked segment is in window, but our windows are *not*
119  * equivalent with the ones of the sender/receiver. We always
120  * try to guess the state of the current sender.
121  *
122  * The meaning of the states are:
123  *
124  * NONE:        initial state
125  * SYN_SENT:    SYN-only packet seen
126  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
127  * SYN_RECV:    SYN-ACK packet seen
128  * ESTABLISHED: ACK packet seen
129  * FIN_WAIT:    FIN packet seen
130  * CLOSE_WAIT:  ACK seen (after FIN)
131  * LAST_ACK:    FIN seen (after FIN)
132  * TIME_WAIT:   last ACK seen
133  * CLOSE:       closed connection (RST)
134  *
135  * Packets marked as IGNORED (sIG):
136  *      if they may be either invalid or valid
137  *      and the receiver may send back a connection
138  *      closing RST or a SYN/ACK.
139  *
140  * Packets marked as INVALID (sIV):
141  *      if we regard them as truly invalid packets
142  */
143 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
144         {
145 /* ORIGINAL */
146 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
147 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
148 /*
149  *      sNO -> sSS      Initialize a new connection
150  *      sSS -> sSS      Retransmitted SYN
151  *      sS2 -> sS2      Late retransmitted SYN
152  *      sSR -> sIG
153  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
154  *                      are errors. Receiver will reply with RST
155  *                      and close the connection.
156  *                      Or we are not in sync and hold a dead connection.
157  *      sFW -> sIG
158  *      sCW -> sIG
159  *      sLA -> sIG
160  *      sTW -> sSS      Reopened connection (RFC 1122).
161  *      sCL -> sSS
162  */
163 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
164 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
165 /*
166  *      sNO -> sIV      Too late and no reason to do anything
167  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
168  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
169  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
170  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
171  *      sFW -> sIV
172  *      sCW -> sIV
173  *      sLA -> sIV
174  *      sTW -> sIV
175  *      sCL -> sIV
176  */
177 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
178 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
179 /*
180  *      sNO -> sIV      Too late and no reason to do anything...
181  *      sSS -> sIV      Client migth not send FIN in this state:
182  *                      we enforce waiting for a SYN/ACK reply first.
183  *      sS2 -> sIV
184  *      sSR -> sFW      Close started.
185  *      sES -> sFW
186  *      sFW -> sLA      FIN seen in both directions, waiting for
187  *                      the last ACK.
188  *                      Migth be a retransmitted FIN as well...
189  *      sCW -> sLA
190  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
191  *      sTW -> sTW
192  *      sCL -> sCL
193  */
194 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
195 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
196 /*
197  *      sNO -> sES      Assumed.
198  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
199  *      sS2 -> sIV
200  *      sSR -> sES      Established state is reached.
201  *      sES -> sES      :-)
202  *      sFW -> sCW      Normal close request answered by ACK.
203  *      sCW -> sCW
204  *      sLA -> sTW      Last ACK detected.
205  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
206  *      sCL -> sCL
207  */
208 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
209 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
210 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
211         },
212         {
213 /* REPLY */
214 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
215 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
216 /*
217  *      sNO -> sIV      Never reached.
218  *      sSS -> sS2      Simultaneous open
219  *      sS2 -> sS2      Retransmitted simultaneous SYN
220  *      sSR -> sIV      Invalid SYN packets sent by the server
221  *      sES -> sIV
222  *      sFW -> sIV
223  *      sCW -> sIV
224  *      sLA -> sIV
225  *      sTW -> sIV      Reopened connection, but server may not do it.
226  *      sCL -> sIV
227  */
228 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
229 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
230 /*
231  *      sSS -> sSR      Standard open.
232  *      sS2 -> sSR      Simultaneous open
233  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
234  *      sES -> sIG      Late retransmitted SYN/ACK?
235  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
236  *      sCW -> sIG
237  *      sLA -> sIG
238  *      sTW -> sIG
239  *      sCL -> sIG
240  */
241 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
242 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
243 /*
244  *      sSS -> sIV      Server might not send FIN in this state.
245  *      sS2 -> sIV
246  *      sSR -> sFW      Close started.
247  *      sES -> sFW
248  *      sFW -> sLA      FIN seen in both directions.
249  *      sCW -> sLA
250  *      sLA -> sLA      Retransmitted FIN.
251  *      sTW -> sTW
252  *      sCL -> sCL
253  */
254 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
255 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
256 /*
257  *      sSS -> sIG      Might be a half-open connection.
258  *      sS2 -> sIG
259  *      sSR -> sSR      Might answer late resent SYN.
260  *      sES -> sES      :-)
261  *      sFW -> sCW      Normal close request answered by ACK.
262  *      sCW -> sCW
263  *      sLA -> sTW      Last ACK detected.
264  *      sTW -> sTW      Retransmitted last ACK.
265  *      sCL -> sCL
266  */
267 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
268 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
269 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
270         }
271 };
272
273 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
274 {
275         return &net->ct.nf_ct_proto.tcp;
276 }
277
278 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
279                              struct nf_conntrack_tuple *tuple)
280 {
281         const struct tcphdr *hp;
282         struct tcphdr _hdr;
283
284         /* Actually only need first 8 bytes. */
285         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
286         if (hp == NULL)
287                 return false;
288
289         tuple->src.u.tcp.port = hp->source;
290         tuple->dst.u.tcp.port = hp->dest;
291
292         return true;
293 }
294
295 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
296                              const struct nf_conntrack_tuple *orig)
297 {
298         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
299         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
300         return true;
301 }
302
303 /* Print out the per-protocol part of the tuple. */
304 static int tcp_print_tuple(struct seq_file *s,
305                            const struct nf_conntrack_tuple *tuple)
306 {
307         return seq_printf(s, "sport=%hu dport=%hu ",
308                           ntohs(tuple->src.u.tcp.port),
309                           ntohs(tuple->dst.u.tcp.port));
310 }
311
312 /* Print out the private part of the conntrack. */
313 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
314 {
315         enum tcp_conntrack state;
316
317         spin_lock_bh(&ct->lock);
318         state = ct->proto.tcp.state;
319         spin_unlock_bh(&ct->lock);
320
321         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
322 }
323
324 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
325 {
326         if (tcph->rst) return TCP_RST_SET;
327         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
328         else if (tcph->fin) return TCP_FIN_SET;
329         else if (tcph->ack) return TCP_ACK_SET;
330         else return TCP_NONE_SET;
331 }
332
333 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
334    in IP Filter' by Guido van Rooij.
335
336    http://www.sane.nl/events/sane2000/papers.html
337    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
338
339    The boundaries and the conditions are changed according to RFC793:
340    the packet must intersect the window (i.e. segments may be
341    after the right or before the left edge) and thus receivers may ACK
342    segments after the right edge of the window.
343
344         td_maxend = max(sack + max(win,1)) seen in reply packets
345         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
346         td_maxwin += seq + len - sender.td_maxend
347                         if seq + len > sender.td_maxend
348         td_end    = max(seq + len) seen in sent packets
349
350    I.   Upper bound for valid data:     seq <= sender.td_maxend
351    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
352    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
353    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
354
355    where sack is the highest right edge of sack block found in the packet
356    or ack in the case of packet without SACK option.
357
358    The upper bound limit for a valid (s)ack is not ignored -
359    we doesn't have to deal with fragments.
360 */
361
362 static inline __u32 segment_seq_plus_len(__u32 seq,
363                                          size_t len,
364                                          unsigned int dataoff,
365                                          const struct tcphdr *tcph)
366 {
367         /* XXX Should I use payload length field in IP/IPv6 header ?
368          * - YK */
369         return (seq + len - dataoff - tcph->doff*4
370                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
371 }
372
373 /* Fixme: what about big packets? */
374 #define MAXACKWINCONST                  66000
375 #define MAXACKWINDOW(sender)                                            \
376         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
377                                               : MAXACKWINCONST)
378
379 /*
380  * Simplified tcp_parse_options routine from tcp_input.c
381  */
382 static void tcp_options(const struct sk_buff *skb,
383                         unsigned int dataoff,
384                         const struct tcphdr *tcph,
385                         struct ip_ct_tcp_state *state)
386 {
387         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
388         const unsigned char *ptr;
389         int length = (tcph->doff*4) - sizeof(struct tcphdr);
390
391         if (!length)
392                 return;
393
394         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
395                                  length, buff);
396         BUG_ON(ptr == NULL);
397
398         state->td_scale =
399         state->flags = 0;
400
401         while (length > 0) {
402                 int opcode=*ptr++;
403                 int opsize;
404
405                 switch (opcode) {
406                 case TCPOPT_EOL:
407                         return;
408                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
409                         length--;
410                         continue;
411                 default:
412                         opsize=*ptr++;
413                         if (opsize < 2) /* "silly options" */
414                                 return;
415                         if (opsize > length)
416                                 return; /* don't parse partial options */
417
418                         if (opcode == TCPOPT_SACK_PERM
419                             && opsize == TCPOLEN_SACK_PERM)
420                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
421                         else if (opcode == TCPOPT_WINDOW
422                                  && opsize == TCPOLEN_WINDOW) {
423                                 state->td_scale = *(u_int8_t *)ptr;
424
425                                 if (state->td_scale > 14) {
426                                         /* See RFC1323 */
427                                         state->td_scale = 14;
428                                 }
429                                 state->flags |=
430                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
431                         }
432                         ptr += opsize - 2;
433                         length -= opsize;
434                 }
435         }
436 }
437
438 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
439                      const struct tcphdr *tcph, __u32 *sack)
440 {
441         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
442         const unsigned char *ptr;
443         int length = (tcph->doff*4) - sizeof(struct tcphdr);
444         __u32 tmp;
445
446         if (!length)
447                 return;
448
449         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
450                                  length, buff);
451         BUG_ON(ptr == NULL);
452
453         /* Fast path for timestamp-only option */
454         if (length == TCPOLEN_TSTAMP_ALIGNED
455             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
456                                        | (TCPOPT_NOP << 16)
457                                        | (TCPOPT_TIMESTAMP << 8)
458                                        | TCPOLEN_TIMESTAMP))
459                 return;
460
461         while (length > 0) {
462                 int opcode = *ptr++;
463                 int opsize, i;
464
465                 switch (opcode) {
466                 case TCPOPT_EOL:
467                         return;
468                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
469                         length--;
470                         continue;
471                 default:
472                         opsize = *ptr++;
473                         if (opsize < 2) /* "silly options" */
474                                 return;
475                         if (opsize > length)
476                                 return; /* don't parse partial options */
477
478                         if (opcode == TCPOPT_SACK
479                             && opsize >= (TCPOLEN_SACK_BASE
480                                           + TCPOLEN_SACK_PERBLOCK)
481                             && !((opsize - TCPOLEN_SACK_BASE)
482                                  % TCPOLEN_SACK_PERBLOCK)) {
483                                 for (i = 0;
484                                      i < (opsize - TCPOLEN_SACK_BASE);
485                                      i += TCPOLEN_SACK_PERBLOCK) {
486                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
487
488                                         if (after(tmp, *sack))
489                                                 *sack = tmp;
490                                 }
491                                 return;
492                         }
493                         ptr += opsize - 2;
494                         length -= opsize;
495                 }
496         }
497 }
498
499 static bool tcp_in_window(const struct nf_conn *ct,
500                           struct ip_ct_tcp *state,
501                           enum ip_conntrack_dir dir,
502                           unsigned int index,
503                           const struct sk_buff *skb,
504                           unsigned int dataoff,
505                           const struct tcphdr *tcph,
506                           u_int8_t pf)
507 {
508         struct net *net = nf_ct_net(ct);
509         struct nf_tcp_net *tn = tcp_pernet(net);
510         struct ip_ct_tcp_state *sender = &state->seen[dir];
511         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
512         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
513         __u32 seq, ack, sack, end, win, swin;
514         s32 receiver_offset;
515         bool res, in_recv_win;
516
517         /*
518          * Get the required data from the packet.
519          */
520         seq = ntohl(tcph->seq);
521         ack = sack = ntohl(tcph->ack_seq);
522         win = ntohs(tcph->window);
523         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
524
525         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
526                 tcp_sack(skb, dataoff, tcph, &sack);
527
528         /* Take into account NAT sequence number mangling */
529         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
530         ack -= receiver_offset;
531         sack -= receiver_offset;
532
533         pr_debug("tcp_in_window: START\n");
534         pr_debug("tcp_in_window: ");
535         nf_ct_dump_tuple(tuple);
536         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
537                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
538         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
539                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
540                  sender->td_end, sender->td_maxend, sender->td_maxwin,
541                  sender->td_scale,
542                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
543                  receiver->td_scale);
544
545         if (sender->td_maxwin == 0) {
546                 /*
547                  * Initialize sender data.
548                  */
549                 if (tcph->syn) {
550                         /*
551                          * SYN-ACK in reply to a SYN
552                          * or SYN from reply direction in simultaneous open.
553                          */
554                         sender->td_end =
555                         sender->td_maxend = end;
556                         sender->td_maxwin = (win == 0 ? 1 : win);
557
558                         tcp_options(skb, dataoff, tcph, sender);
559                         /*
560                          * RFC 1323:
561                          * Both sides must send the Window Scale option
562                          * to enable window scaling in either direction.
563                          */
564                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
565                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
566                                 sender->td_scale =
567                                 receiver->td_scale = 0;
568                         if (!tcph->ack)
569                                 /* Simultaneous open */
570                                 return true;
571                 } else {
572                         /*
573                          * We are in the middle of a connection,
574                          * its history is lost for us.
575                          * Let's try to use the data from the packet.
576                          */
577                         sender->td_end = end;
578                         swin = win << sender->td_scale;
579                         sender->td_maxwin = (swin == 0 ? 1 : swin);
580                         sender->td_maxend = end + sender->td_maxwin;
581                         /*
582                          * We haven't seen traffic in the other direction yet
583                          * but we have to tweak window tracking to pass III
584                          * and IV until that happens.
585                          */
586                         if (receiver->td_maxwin == 0)
587                                 receiver->td_end = receiver->td_maxend = sack;
588                 }
589         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
590                      && dir == IP_CT_DIR_ORIGINAL)
591                    || (state->state == TCP_CONNTRACK_SYN_RECV
592                      && dir == IP_CT_DIR_REPLY))
593                    && after(end, sender->td_end)) {
594                 /*
595                  * RFC 793: "if a TCP is reinitialized ... then it need
596                  * not wait at all; it must only be sure to use sequence
597                  * numbers larger than those recently used."
598                  */
599                 sender->td_end =
600                 sender->td_maxend = end;
601                 sender->td_maxwin = (win == 0 ? 1 : win);
602
603                 tcp_options(skb, dataoff, tcph, sender);
604         }
605
606         if (!(tcph->ack)) {
607                 /*
608                  * If there is no ACK, just pretend it was set and OK.
609                  */
610                 ack = sack = receiver->td_end;
611         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
612                     (TCP_FLAG_ACK|TCP_FLAG_RST))
613                    && (ack == 0)) {
614                 /*
615                  * Broken TCP stacks, that set ACK in RST packets as well
616                  * with zero ack value.
617                  */
618                 ack = sack = receiver->td_end;
619         }
620
621         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
622                 /*
623                  * RST sent answering SYN.
624                  */
625                 seq = end = sender->td_end;
626
627         pr_debug("tcp_in_window: ");
628         nf_ct_dump_tuple(tuple);
629         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
630                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
631         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
632                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
633                  sender->td_end, sender->td_maxend, sender->td_maxwin,
634                  sender->td_scale,
635                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
636                  receiver->td_scale);
637
638         /* Is the ending sequence in the receive window (if available)? */
639         in_recv_win = !receiver->td_maxwin ||
640                       after(end, sender->td_end - receiver->td_maxwin - 1);
641
642         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
643                  before(seq, sender->td_maxend + 1),
644                  (in_recv_win ? 1 : 0),
645                  before(sack, receiver->td_end + 1),
646                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
647
648         if (before(seq, sender->td_maxend + 1) &&
649             in_recv_win &&
650             before(sack, receiver->td_end + 1) &&
651             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
652                 /*
653                  * Take into account window scaling (RFC 1323).
654                  */
655                 if (!tcph->syn)
656                         win <<= sender->td_scale;
657
658                 /*
659                  * Update sender data.
660                  */
661                 swin = win + (sack - ack);
662                 if (sender->td_maxwin < swin)
663                         sender->td_maxwin = swin;
664                 if (after(end, sender->td_end)) {
665                         sender->td_end = end;
666                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
667                 }
668                 if (tcph->ack) {
669                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
670                                 sender->td_maxack = ack;
671                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
672                         } else if (after(ack, sender->td_maxack))
673                                 sender->td_maxack = ack;
674                 }
675
676                 /*
677                  * Update receiver data.
678                  */
679                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
680                         receiver->td_maxwin += end - sender->td_maxend;
681                 if (after(sack + win, receiver->td_maxend - 1)) {
682                         receiver->td_maxend = sack + win;
683                         if (win == 0)
684                                 receiver->td_maxend++;
685                 }
686                 if (ack == receiver->td_end)
687                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
688
689                 /*
690                  * Check retransmissions.
691                  */
692                 if (index == TCP_ACK_SET) {
693                         if (state->last_dir == dir
694                             && state->last_seq == seq
695                             && state->last_ack == ack
696                             && state->last_end == end
697                             && state->last_win == win)
698                                 state->retrans++;
699                         else {
700                                 state->last_dir = dir;
701                                 state->last_seq = seq;
702                                 state->last_ack = ack;
703                                 state->last_end = end;
704                                 state->last_win = win;
705                                 state->retrans = 0;
706                         }
707                 }
708                 res = true;
709         } else {
710                 res = false;
711                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
712                     tn->tcp_be_liberal)
713                         res = true;
714                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
715                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
716                         "nf_ct_tcp: %s ",
717                         before(seq, sender->td_maxend + 1) ?
718                         in_recv_win ?
719                         before(sack, receiver->td_end + 1) ?
720                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
721                         : "ACK is under the lower bound (possible overly delayed ACK)"
722                         : "ACK is over the upper bound (ACKed data not seen yet)"
723                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
724                         : "SEQ is over the upper bound (over the window of the receiver)");
725         }
726
727         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
728                  "receiver end=%u maxend=%u maxwin=%u\n",
729                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
730                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
731
732         return res;
733 }
734
735 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
736 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
737                                  TCPHDR_URG) + 1] =
738 {
739         [TCPHDR_SYN]                            = 1,
740         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
741         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
742         [TCPHDR_RST]                            = 1,
743         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
744         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
745         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
746         [TCPHDR_ACK]                            = 1,
747         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
748 };
749
750 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
751 static int tcp_error(struct net *net, struct nf_conn *tmpl,
752                      struct sk_buff *skb,
753                      unsigned int dataoff,
754                      enum ip_conntrack_info *ctinfo,
755                      u_int8_t pf,
756                      unsigned int hooknum)
757 {
758         const struct tcphdr *th;
759         struct tcphdr _tcph;
760         unsigned int tcplen = skb->len - dataoff;
761         u_int8_t tcpflags;
762
763         /* Smaller that minimal TCP header? */
764         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
765         if (th == NULL) {
766                 if (LOG_INVALID(net, IPPROTO_TCP))
767                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
768                                 "nf_ct_tcp: short packet ");
769                 return -NF_ACCEPT;
770         }
771
772         /* Not whole TCP header or malformed packet */
773         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
774                 if (LOG_INVALID(net, IPPROTO_TCP))
775                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
776                                 "nf_ct_tcp: truncated/malformed packet ");
777                 return -NF_ACCEPT;
778         }
779
780         /* Checksum invalid? Ignore.
781          * We skip checking packets on the outgoing path
782          * because the checksum is assumed to be correct.
783          */
784         /* FIXME: Source route IP option packets --RR */
785         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
786             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
787                 if (LOG_INVALID(net, IPPROTO_TCP))
788                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
789                                   "nf_ct_tcp: bad TCP checksum ");
790                 return -NF_ACCEPT;
791         }
792
793         /* Check TCP flags. */
794         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
795         if (!tcp_valid_flags[tcpflags]) {
796                 if (LOG_INVALID(net, IPPROTO_TCP))
797                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
798                                   "nf_ct_tcp: invalid TCP flag combination ");
799                 return -NF_ACCEPT;
800         }
801
802         return NF_ACCEPT;
803 }
804
805 static unsigned int *tcp_get_timeouts(struct net *net)
806 {
807         return tcp_pernet(net)->timeouts;
808 }
809
810 /* Returns verdict for packet, or -1 for invalid. */
811 static int tcp_packet(struct nf_conn *ct,
812                       const struct sk_buff *skb,
813                       unsigned int dataoff,
814                       enum ip_conntrack_info ctinfo,
815                       u_int8_t pf,
816                       unsigned int hooknum,
817                       unsigned int *timeouts)
818 {
819         struct net *net = nf_ct_net(ct);
820         struct nf_tcp_net *tn = tcp_pernet(net);
821         struct nf_conntrack_tuple *tuple;
822         enum tcp_conntrack new_state, old_state;
823         enum ip_conntrack_dir dir;
824         const struct tcphdr *th;
825         struct tcphdr _tcph;
826         unsigned long timeout;
827         unsigned int index;
828
829         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
830         BUG_ON(th == NULL);
831
832         spin_lock_bh(&ct->lock);
833         old_state = ct->proto.tcp.state;
834         dir = CTINFO2DIR(ctinfo);
835         index = get_conntrack_index(th);
836         new_state = tcp_conntracks[dir][index][old_state];
837         tuple = &ct->tuplehash[dir].tuple;
838
839         switch (new_state) {
840         case TCP_CONNTRACK_SYN_SENT:
841                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
842                         break;
843                 /* RFC 1122: "When a connection is closed actively,
844                  * it MUST linger in TIME-WAIT state for a time 2xMSL
845                  * (Maximum Segment Lifetime). However, it MAY accept
846                  * a new SYN from the remote TCP to reopen the connection
847                  * directly from TIME-WAIT state, if..."
848                  * We ignore the conditions because we are in the
849                  * TIME-WAIT state anyway.
850                  *
851                  * Handle aborted connections: we and the server
852                  * think there is an existing connection but the client
853                  * aborts it and starts a new one.
854                  */
855                 if (((ct->proto.tcp.seen[dir].flags
856                       | ct->proto.tcp.seen[!dir].flags)
857                      & IP_CT_TCP_FLAG_CLOSE_INIT)
858                     || (ct->proto.tcp.last_dir == dir
859                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
860                         /* Attempt to reopen a closed/aborted connection.
861                          * Delete this connection and look up again. */
862                         spin_unlock_bh(&ct->lock);
863
864                         /* Only repeat if we can actually remove the timer.
865                          * Destruction may already be in progress in process
866                          * context and we must give it a chance to terminate.
867                          */
868                         if (nf_ct_kill(ct))
869                                 return -NF_REPEAT;
870                         return NF_DROP;
871                 }
872                 /* Fall through */
873         case TCP_CONNTRACK_IGNORE:
874                 /* Ignored packets:
875                  *
876                  * Our connection entry may be out of sync, so ignore
877                  * packets which may signal the real connection between
878                  * the client and the server.
879                  *
880                  * a) SYN in ORIGINAL
881                  * b) SYN/ACK in REPLY
882                  * c) ACK in reply direction after initial SYN in original.
883                  *
884                  * If the ignored packet is invalid, the receiver will send
885                  * a RST we'll catch below.
886                  */
887                 if (index == TCP_SYNACK_SET
888                     && ct->proto.tcp.last_index == TCP_SYN_SET
889                     && ct->proto.tcp.last_dir != dir
890                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
891                         /* b) This SYN/ACK acknowledges a SYN that we earlier
892                          * ignored as invalid. This means that the client and
893                          * the server are both in sync, while the firewall is
894                          * not. We get in sync from the previously annotated
895                          * values.
896                          */
897                         old_state = TCP_CONNTRACK_SYN_SENT;
898                         new_state = TCP_CONNTRACK_SYN_RECV;
899                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
900                                 ct->proto.tcp.last_end;
901                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
902                                 ct->proto.tcp.last_end;
903                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
904                                 ct->proto.tcp.last_win == 0 ?
905                                         1 : ct->proto.tcp.last_win;
906                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
907                                 ct->proto.tcp.last_wscale;
908                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
909                                 ct->proto.tcp.last_flags;
910                         memset(&ct->proto.tcp.seen[dir], 0,
911                                sizeof(struct ip_ct_tcp_state));
912                         break;
913                 }
914                 ct->proto.tcp.last_index = index;
915                 ct->proto.tcp.last_dir = dir;
916                 ct->proto.tcp.last_seq = ntohl(th->seq);
917                 ct->proto.tcp.last_end =
918                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
919                 ct->proto.tcp.last_win = ntohs(th->window);
920
921                 /* a) This is a SYN in ORIGINAL. The client and the server
922                  * may be in sync but we are not. In that case, we annotate
923                  * the TCP options and let the packet go through. If it is a
924                  * valid SYN packet, the server will reply with a SYN/ACK, and
925                  * then we'll get in sync. Otherwise, the server ignores it. */
926                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
927                         struct ip_ct_tcp_state seen = {};
928
929                         ct->proto.tcp.last_flags =
930                         ct->proto.tcp.last_wscale = 0;
931                         tcp_options(skb, dataoff, th, &seen);
932                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
933                                 ct->proto.tcp.last_flags |=
934                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
935                                 ct->proto.tcp.last_wscale = seen.td_scale;
936                         }
937                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
938                                 ct->proto.tcp.last_flags |=
939                                         IP_CT_TCP_FLAG_SACK_PERM;
940                         }
941                 }
942                 spin_unlock_bh(&ct->lock);
943                 if (LOG_INVALID(net, IPPROTO_TCP))
944                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
945                                   "nf_ct_tcp: invalid packet ignored in "
946                                   "state %s ", tcp_conntrack_names[old_state]);
947                 return NF_ACCEPT;
948         case TCP_CONNTRACK_MAX:
949                 /* Invalid packet */
950                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
951                          dir, get_conntrack_index(th), old_state);
952                 spin_unlock_bh(&ct->lock);
953                 if (LOG_INVALID(net, IPPROTO_TCP))
954                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
955                                   "nf_ct_tcp: invalid state ");
956                 return -NF_ACCEPT;
957         case TCP_CONNTRACK_CLOSE:
958                 if (index == TCP_RST_SET
959                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
960                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
961                         /* Invalid RST  */
962                         spin_unlock_bh(&ct->lock);
963                         if (LOG_INVALID(net, IPPROTO_TCP))
964                                 nf_log_packet(net, pf, 0, skb, NULL, NULL,
965                                               NULL, "nf_ct_tcp: invalid RST ");
966                         return -NF_ACCEPT;
967                 }
968                 if (index == TCP_RST_SET
969                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
970                          && ct->proto.tcp.last_index == TCP_SYN_SET)
971                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
972                             && ct->proto.tcp.last_index == TCP_ACK_SET))
973                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
974                         /* RST sent to invalid SYN or ACK we had let through
975                          * at a) and c) above:
976                          *
977                          * a) SYN was in window then
978                          * c) we hold a half-open connection.
979                          *
980                          * Delete our connection entry.
981                          * We skip window checking, because packet might ACK
982                          * segments we ignored. */
983                         goto in_window;
984                 }
985                 /* Just fall through */
986         default:
987                 /* Keep compilers happy. */
988                 break;
989         }
990
991         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
992                            skb, dataoff, th, pf)) {
993                 spin_unlock_bh(&ct->lock);
994                 return -NF_ACCEPT;
995         }
996      in_window:
997         /* From now on we have got in-window packets */
998         ct->proto.tcp.last_index = index;
999         ct->proto.tcp.last_dir = dir;
1000
1001         pr_debug("tcp_conntracks: ");
1002         nf_ct_dump_tuple(tuple);
1003         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1004                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1005                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1006                  old_state, new_state);
1007
1008         ct->proto.tcp.state = new_state;
1009         if (old_state != new_state
1010             && new_state == TCP_CONNTRACK_FIN_WAIT)
1011                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1012
1013         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1014             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1015                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1016         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1017                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1018                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1019                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1020         else
1021                 timeout = timeouts[new_state];
1022         spin_unlock_bh(&ct->lock);
1023
1024         if (new_state != old_state)
1025                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1026
1027         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1028                 /* If only reply is a RST, we can consider ourselves not to
1029                    have an established connection: this is a fairly common
1030                    problem case, so we can delete the conntrack
1031                    immediately.  --RR */
1032                 if (th->rst) {
1033                         nf_ct_kill_acct(ct, ctinfo, skb);
1034                         return NF_ACCEPT;
1035                 }
1036                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1037                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1038                  */
1039                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1040                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1041                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1042         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1043                    && (old_state == TCP_CONNTRACK_SYN_RECV
1044                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1045                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1046                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1047                    after SYN_RECV or a valid answer for a picked up
1048                    connection. */
1049                 set_bit(IPS_ASSURED_BIT, &ct->status);
1050                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1051         }
1052         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1053
1054         return NF_ACCEPT;
1055 }
1056
1057 /* Called when a new connection for this protocol found. */
1058 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1059                     unsigned int dataoff, unsigned int *timeouts)
1060 {
1061         enum tcp_conntrack new_state;
1062         const struct tcphdr *th;
1063         struct tcphdr _tcph;
1064         struct net *net = nf_ct_net(ct);
1065         struct nf_tcp_net *tn = tcp_pernet(net);
1066         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1067         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1068
1069         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1070         BUG_ON(th == NULL);
1071
1072         /* Don't need lock here: this conntrack not in circulation yet */
1073         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1074
1075         /* Invalid: delete conntrack */
1076         if (new_state >= TCP_CONNTRACK_MAX) {
1077                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1078                 return false;
1079         }
1080
1081         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1082                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1083                 /* SYN packet */
1084                 ct->proto.tcp.seen[0].td_end =
1085                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1086                                              dataoff, th);
1087                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1088                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1089                         ct->proto.tcp.seen[0].td_maxwin = 1;
1090                 ct->proto.tcp.seen[0].td_maxend =
1091                         ct->proto.tcp.seen[0].td_end;
1092
1093                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1094         } else if (tn->tcp_loose == 0) {
1095                 /* Don't try to pick up connections. */
1096                 return false;
1097         } else {
1098                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1099                 /*
1100                  * We are in the middle of a connection,
1101                  * its history is lost for us.
1102                  * Let's try to use the data from the packet.
1103                  */
1104                 ct->proto.tcp.seen[0].td_end =
1105                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1106                                              dataoff, th);
1107                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1108                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1109                         ct->proto.tcp.seen[0].td_maxwin = 1;
1110                 ct->proto.tcp.seen[0].td_maxend =
1111                         ct->proto.tcp.seen[0].td_end +
1112                         ct->proto.tcp.seen[0].td_maxwin;
1113
1114                 /* We assume SACK and liberal window checking to handle
1115                  * window scaling */
1116                 ct->proto.tcp.seen[0].flags =
1117                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1118                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1119         }
1120
1121         /* tcp_packet will set them */
1122         ct->proto.tcp.last_index = TCP_NONE_SET;
1123
1124         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1125                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1126                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1127                  sender->td_scale,
1128                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1129                  receiver->td_scale);
1130         return true;
1131 }
1132
1133 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1134
1135 #include <linux/netfilter/nfnetlink.h>
1136 #include <linux/netfilter/nfnetlink_conntrack.h>
1137
1138 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1139                          struct nf_conn *ct)
1140 {
1141         struct nlattr *nest_parms;
1142         struct nf_ct_tcp_flags tmp = {};
1143
1144         spin_lock_bh(&ct->lock);
1145         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1146         if (!nest_parms)
1147                 goto nla_put_failure;
1148
1149         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1150             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1151                        ct->proto.tcp.seen[0].td_scale) ||
1152             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1153                        ct->proto.tcp.seen[1].td_scale))
1154                 goto nla_put_failure;
1155
1156         tmp.flags = ct->proto.tcp.seen[0].flags;
1157         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1158                     sizeof(struct nf_ct_tcp_flags), &tmp))
1159                 goto nla_put_failure;
1160
1161         tmp.flags = ct->proto.tcp.seen[1].flags;
1162         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1163                     sizeof(struct nf_ct_tcp_flags), &tmp))
1164                 goto nla_put_failure;
1165         spin_unlock_bh(&ct->lock);
1166
1167         nla_nest_end(skb, nest_parms);
1168
1169         return 0;
1170
1171 nla_put_failure:
1172         spin_unlock_bh(&ct->lock);
1173         return -1;
1174 }
1175
1176 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1177         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1178         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1179         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1180         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1181         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1182 };
1183
1184 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1185 {
1186         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1187         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1188         int err;
1189
1190         /* updates could not contain anything about the private
1191          * protocol info, in that case skip the parsing */
1192         if (!pattr)
1193                 return 0;
1194
1195         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1196         if (err < 0)
1197                 return err;
1198
1199         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1200             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1201                 return -EINVAL;
1202
1203         spin_lock_bh(&ct->lock);
1204         if (tb[CTA_PROTOINFO_TCP_STATE])
1205                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1206
1207         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1208                 struct nf_ct_tcp_flags *attr =
1209                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1210                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1211                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1212         }
1213
1214         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1215                 struct nf_ct_tcp_flags *attr =
1216                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1217                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1218                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1219         }
1220
1221         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1222             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1223             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1224             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1225                 ct->proto.tcp.seen[0].td_scale =
1226                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1227                 ct->proto.tcp.seen[1].td_scale =
1228                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1229         }
1230         spin_unlock_bh(&ct->lock);
1231
1232         return 0;
1233 }
1234
1235 static int tcp_nlattr_size(void)
1236 {
1237         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1238                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1239 }
1240
1241 static int tcp_nlattr_tuple_size(void)
1242 {
1243         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1244 }
1245 #endif
1246
1247 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1248
1249 #include <linux/netfilter/nfnetlink.h>
1250 #include <linux/netfilter/nfnetlink_cttimeout.h>
1251
1252 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1253                                      struct net *net, void *data)
1254 {
1255         unsigned int *timeouts = data;
1256         struct nf_tcp_net *tn = tcp_pernet(net);
1257         int i;
1258
1259         /* set default TCP timeouts. */
1260         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1261                 timeouts[i] = tn->timeouts[i];
1262
1263         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1264                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1265                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1266         }
1267         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1268                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1269                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1270         }
1271         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1272                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1273                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1274         }
1275         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1276                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1277                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1278         }
1279         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1280                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1281                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1282         }
1283         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1284                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1285                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1286         }
1287         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1288                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1289                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1290         }
1291         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1292                 timeouts[TCP_CONNTRACK_CLOSE] =
1293                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1294         }
1295         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1296                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1297                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1298         }
1299         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1300                 timeouts[TCP_CONNTRACK_RETRANS] =
1301                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1302         }
1303         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1304                 timeouts[TCP_CONNTRACK_UNACK] =
1305                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1306         }
1307         return 0;
1308 }
1309
1310 static int
1311 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1312 {
1313         const unsigned int *timeouts = data;
1314
1315         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1316                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1317             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1318                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1319             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1320                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1321             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1322                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1323             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1324                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1325             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1326                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1327             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1328                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1329             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1330                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1331             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1332                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1333             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1334                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1335             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1336                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1337                 goto nla_put_failure;
1338         return 0;
1339
1340 nla_put_failure:
1341         return -ENOSPC;
1342 }
1343
1344 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1345         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1346         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1347         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1348         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1349         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1350         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1351         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1352         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1353         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1354         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1355         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1356 };
1357 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1358
1359 #ifdef CONFIG_SYSCTL
1360 static struct ctl_table tcp_sysctl_table[] = {
1361         {
1362                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1363                 .maxlen         = sizeof(unsigned int),
1364                 .mode           = 0644,
1365                 .proc_handler   = proc_dointvec_jiffies,
1366         },
1367         {
1368                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1369                 .maxlen         = sizeof(unsigned int),
1370                 .mode           = 0644,
1371                 .proc_handler   = proc_dointvec_jiffies,
1372         },
1373         {
1374                 .procname       = "nf_conntrack_tcp_timeout_established",
1375                 .maxlen         = sizeof(unsigned int),
1376                 .mode           = 0644,
1377                 .proc_handler   = proc_dointvec_jiffies,
1378         },
1379         {
1380                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1381                 .maxlen         = sizeof(unsigned int),
1382                 .mode           = 0644,
1383                 .proc_handler   = proc_dointvec_jiffies,
1384         },
1385         {
1386                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1387                 .maxlen         = sizeof(unsigned int),
1388                 .mode           = 0644,
1389                 .proc_handler   = proc_dointvec_jiffies,
1390         },
1391         {
1392                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1393                 .maxlen         = sizeof(unsigned int),
1394                 .mode           = 0644,
1395                 .proc_handler   = proc_dointvec_jiffies,
1396         },
1397         {
1398                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1399                 .maxlen         = sizeof(unsigned int),
1400                 .mode           = 0644,
1401                 .proc_handler   = proc_dointvec_jiffies,
1402         },
1403         {
1404                 .procname       = "nf_conntrack_tcp_timeout_close",
1405                 .maxlen         = sizeof(unsigned int),
1406                 .mode           = 0644,
1407                 .proc_handler   = proc_dointvec_jiffies,
1408         },
1409         {
1410                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1411                 .maxlen         = sizeof(unsigned int),
1412                 .mode           = 0644,
1413                 .proc_handler   = proc_dointvec_jiffies,
1414         },
1415         {
1416                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1417                 .maxlen         = sizeof(unsigned int),
1418                 .mode           = 0644,
1419                 .proc_handler   = proc_dointvec_jiffies,
1420         },
1421         {
1422                 .procname       = "nf_conntrack_tcp_loose",
1423                 .maxlen         = sizeof(unsigned int),
1424                 .mode           = 0644,
1425                 .proc_handler   = proc_dointvec,
1426         },
1427         {
1428                 .procname       = "nf_conntrack_tcp_be_liberal",
1429                 .maxlen         = sizeof(unsigned int),
1430                 .mode           = 0644,
1431                 .proc_handler   = proc_dointvec,
1432         },
1433         {
1434                 .procname       = "nf_conntrack_tcp_max_retrans",
1435                 .maxlen         = sizeof(unsigned int),
1436                 .mode           = 0644,
1437                 .proc_handler   = proc_dointvec,
1438         },
1439         { }
1440 };
1441
1442 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1443 static struct ctl_table tcp_compat_sysctl_table[] = {
1444         {
1445                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1446                 .maxlen         = sizeof(unsigned int),
1447                 .mode           = 0644,
1448                 .proc_handler   = proc_dointvec_jiffies,
1449         },
1450         {
1451                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1452                 .maxlen         = sizeof(unsigned int),
1453                 .mode           = 0644,
1454                 .proc_handler   = proc_dointvec_jiffies,
1455         },
1456         {
1457                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1458                 .maxlen         = sizeof(unsigned int),
1459                 .mode           = 0644,
1460                 .proc_handler   = proc_dointvec_jiffies,
1461         },
1462         {
1463                 .procname       = "ip_conntrack_tcp_timeout_established",
1464                 .maxlen         = sizeof(unsigned int),
1465                 .mode           = 0644,
1466                 .proc_handler   = proc_dointvec_jiffies,
1467         },
1468         {
1469                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1470                 .maxlen         = sizeof(unsigned int),
1471                 .mode           = 0644,
1472                 .proc_handler   = proc_dointvec_jiffies,
1473         },
1474         {
1475                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1476                 .maxlen         = sizeof(unsigned int),
1477                 .mode           = 0644,
1478                 .proc_handler   = proc_dointvec_jiffies,
1479         },
1480         {
1481                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1482                 .maxlen         = sizeof(unsigned int),
1483                 .mode           = 0644,
1484                 .proc_handler   = proc_dointvec_jiffies,
1485         },
1486         {
1487                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1488                 .maxlen         = sizeof(unsigned int),
1489                 .mode           = 0644,
1490                 .proc_handler   = proc_dointvec_jiffies,
1491         },
1492         {
1493                 .procname       = "ip_conntrack_tcp_timeout_close",
1494                 .maxlen         = sizeof(unsigned int),
1495                 .mode           = 0644,
1496                 .proc_handler   = proc_dointvec_jiffies,
1497         },
1498         {
1499                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1500                 .maxlen         = sizeof(unsigned int),
1501                 .mode           = 0644,
1502                 .proc_handler   = proc_dointvec_jiffies,
1503         },
1504         {
1505                 .procname       = "ip_conntrack_tcp_loose",
1506                 .maxlen         = sizeof(unsigned int),
1507                 .mode           = 0644,
1508                 .proc_handler   = proc_dointvec,
1509         },
1510         {
1511                 .procname       = "ip_conntrack_tcp_be_liberal",
1512                 .maxlen         = sizeof(unsigned int),
1513                 .mode           = 0644,
1514                 .proc_handler   = proc_dointvec,
1515         },
1516         {
1517                 .procname       = "ip_conntrack_tcp_max_retrans",
1518                 .maxlen         = sizeof(unsigned int),
1519                 .mode           = 0644,
1520                 .proc_handler   = proc_dointvec,
1521         },
1522         { }
1523 };
1524 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1525 #endif /* CONFIG_SYSCTL */
1526
1527 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1528                                     struct nf_tcp_net *tn)
1529 {
1530 #ifdef CONFIG_SYSCTL
1531         if (pn->ctl_table)
1532                 return 0;
1533
1534         pn->ctl_table = kmemdup(tcp_sysctl_table,
1535                                 sizeof(tcp_sysctl_table),
1536                                 GFP_KERNEL);
1537         if (!pn->ctl_table)
1538                 return -ENOMEM;
1539
1540         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1541         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1542         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1543         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1544         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1545         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1546         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1547         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1548         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1549         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1550         pn->ctl_table[10].data = &tn->tcp_loose;
1551         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1552         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1553 #endif
1554         return 0;
1555 }
1556
1557 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1558                                            struct nf_tcp_net *tn)
1559 {
1560 #ifdef CONFIG_SYSCTL
1561 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1562         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1563                                        sizeof(tcp_compat_sysctl_table),
1564                                        GFP_KERNEL);
1565         if (!pn->ctl_compat_table)
1566                 return -ENOMEM;
1567
1568         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1569         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1570         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1571         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1572         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1573         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1574         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1575         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1576         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1577         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1578         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1579         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1580         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1581 #endif
1582 #endif
1583         return 0;
1584 }
1585
1586 static int tcp_init_net(struct net *net, u_int16_t proto)
1587 {
1588         int ret;
1589         struct nf_tcp_net *tn = tcp_pernet(net);
1590         struct nf_proto_net *pn = &tn->pn;
1591
1592         if (!pn->users) {
1593                 int i;
1594
1595                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1596                         tn->timeouts[i] = tcp_timeouts[i];
1597
1598                 tn->tcp_loose = nf_ct_tcp_loose;
1599                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1600                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1601         }
1602
1603         if (proto == AF_INET) {
1604                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1605                 if (ret < 0)
1606                         return ret;
1607
1608                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1609                 if (ret < 0)
1610                         nf_ct_kfree_compat_sysctl_table(pn);
1611         } else
1612                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1613
1614         return ret;
1615 }
1616
1617 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1618 {
1619         return &net->ct.nf_ct_proto.tcp.pn;
1620 }
1621
1622 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1623 {
1624         .l3proto                = PF_INET,
1625         .l4proto                = IPPROTO_TCP,
1626         .name                   = "tcp",
1627         .pkt_to_tuple           = tcp_pkt_to_tuple,
1628         .invert_tuple           = tcp_invert_tuple,
1629         .print_tuple            = tcp_print_tuple,
1630         .print_conntrack        = tcp_print_conntrack,
1631         .packet                 = tcp_packet,
1632         .get_timeouts           = tcp_get_timeouts,
1633         .new                    = tcp_new,
1634         .error                  = tcp_error,
1635 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1636         .to_nlattr              = tcp_to_nlattr,
1637         .nlattr_size            = tcp_nlattr_size,
1638         .from_nlattr            = nlattr_to_tcp,
1639         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1640         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1641         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1642         .nla_policy             = nf_ct_port_nla_policy,
1643 #endif
1644 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1645         .ctnl_timeout           = {
1646                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1647                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1648                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1649                 .obj_size       = sizeof(unsigned int) *
1650                                         TCP_CONNTRACK_TIMEOUT_MAX,
1651                 .nla_policy     = tcp_timeout_nla_policy,
1652         },
1653 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1654         .init_net               = tcp_init_net,
1655         .get_net_proto          = tcp_get_net_proto,
1656 };
1657 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1658
1659 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1660 {
1661         .l3proto                = PF_INET6,
1662         .l4proto                = IPPROTO_TCP,
1663         .name                   = "tcp",
1664         .pkt_to_tuple           = tcp_pkt_to_tuple,
1665         .invert_tuple           = tcp_invert_tuple,
1666         .print_tuple            = tcp_print_tuple,
1667         .print_conntrack        = tcp_print_conntrack,
1668         .packet                 = tcp_packet,
1669         .get_timeouts           = tcp_get_timeouts,
1670         .new                    = tcp_new,
1671         .error                  = tcp_error,
1672 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1673         .to_nlattr              = tcp_to_nlattr,
1674         .nlattr_size            = tcp_nlattr_size,
1675         .from_nlattr            = nlattr_to_tcp,
1676         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1677         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1678         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1679         .nla_policy             = nf_ct_port_nla_policy,
1680 #endif
1681 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1682         .ctnl_timeout           = {
1683                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1684                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1685                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1686                 .obj_size       = sizeof(unsigned int) *
1687                                         TCP_CONNTRACK_TIMEOUT_MAX,
1688                 .nla_policy     = tcp_timeout_nla_policy,
1689         },
1690 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1691         .init_net               = tcp_init_net,
1692         .get_net_proto          = tcp_get_net_proto,
1693 };
1694 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);