Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck...
[pandora-kernel.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/module.h>
12 #include <linux/in.h>
13 #include <linux/tcp.h>
14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h>
18 #include <asm/unaligned.h>
19
20 #include <net/tcp.h>
21
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28 #include <net/netfilter/nf_log.h>
29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31
32 /* "Be conservative in what you do,
33     be liberal in what you accept from others."
34     If it's non-zero, we mark only out of window RST segments as INVALID. */
35 static int nf_ct_tcp_be_liberal __read_mostly = 0;
36
37 /* If it is set to zero, we disable picking up already established
38    connections. */
39 static int nf_ct_tcp_loose __read_mostly = 1;
40
41 /* Max number of the retransmitted packets without receiving an (acceptable)
42    ACK from the destination. If this number is reached, a shorter timer
43    will be started. */
44 static int nf_ct_tcp_max_retrans __read_mostly = 3;
45
46   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47      closely.  They're more complex. --RR */
48
49 static const char *const tcp_conntrack_names[] = {
50         "NONE",
51         "SYN_SENT",
52         "SYN_RECV",
53         "ESTABLISHED",
54         "FIN_WAIT",
55         "CLOSE_WAIT",
56         "LAST_ACK",
57         "TIME_WAIT",
58         "CLOSE",
59         "SYN_SENT2",
60 };
61
62 #define SECS * HZ
63 #define MINS * 60 SECS
64 #define HOURS * 60 MINS
65 #define DAYS * 24 HOURS
66
67 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
68         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
69         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
70         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
71         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
72         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
73         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
74         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
75         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
76         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
77 /* RFC1122 says the R2 limit should be at least 100 seconds.
78    Linux uses 15 packets as limit, which corresponds
79    to ~13-30min depending on RTO. */
80         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
81         [TCP_CONNTRACK_UNACK]           = 5 MINS,
82 };
83
84 #define sNO TCP_CONNTRACK_NONE
85 #define sSS TCP_CONNTRACK_SYN_SENT
86 #define sSR TCP_CONNTRACK_SYN_RECV
87 #define sES TCP_CONNTRACK_ESTABLISHED
88 #define sFW TCP_CONNTRACK_FIN_WAIT
89 #define sCW TCP_CONNTRACK_CLOSE_WAIT
90 #define sLA TCP_CONNTRACK_LAST_ACK
91 #define sTW TCP_CONNTRACK_TIME_WAIT
92 #define sCL TCP_CONNTRACK_CLOSE
93 #define sS2 TCP_CONNTRACK_SYN_SENT2
94 #define sIV TCP_CONNTRACK_MAX
95 #define sIG TCP_CONNTRACK_IGNORE
96
97 /* What TCP flags are set from RST/SYN/FIN/ACK. */
98 enum tcp_bit_set {
99         TCP_SYN_SET,
100         TCP_SYNACK_SET,
101         TCP_FIN_SET,
102         TCP_ACK_SET,
103         TCP_RST_SET,
104         TCP_NONE_SET,
105 };
106
107 /*
108  * The TCP state transition table needs a few words...
109  *
110  * We are the man in the middle. All the packets go through us
111  * but might get lost in transit to the destination.
112  * It is assumed that the destinations can't receive segments
113  * we haven't seen.
114  *
115  * The checked segment is in window, but our windows are *not*
116  * equivalent with the ones of the sender/receiver. We always
117  * try to guess the state of the current sender.
118  *
119  * The meaning of the states are:
120  *
121  * NONE:        initial state
122  * SYN_SENT:    SYN-only packet seen
123  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
124  * SYN_RECV:    SYN-ACK packet seen
125  * ESTABLISHED: ACK packet seen
126  * FIN_WAIT:    FIN packet seen
127  * CLOSE_WAIT:  ACK seen (after FIN)
128  * LAST_ACK:    FIN seen (after FIN)
129  * TIME_WAIT:   last ACK seen
130  * CLOSE:       closed connection (RST)
131  *
132  * Packets marked as IGNORED (sIG):
133  *      if they may be either invalid or valid
134  *      and the receiver may send back a connection
135  *      closing RST or a SYN/ACK.
136  *
137  * Packets marked as INVALID (sIV):
138  *      if we regard them as truly invalid packets
139  */
140 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
141         {
142 /* ORIGINAL */
143 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
144 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
145 /*
146  *      sNO -> sSS      Initialize a new connection
147  *      sSS -> sSS      Retransmitted SYN
148  *      sS2 -> sS2      Late retransmitted SYN
149  *      sSR -> sIG
150  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
151  *                      are errors. Receiver will reply with RST
152  *                      and close the connection.
153  *                      Or we are not in sync and hold a dead connection.
154  *      sFW -> sIG
155  *      sCW -> sIG
156  *      sLA -> sIG
157  *      sTW -> sSS      Reopened connection (RFC 1122).
158  *      sCL -> sSS
159  */
160 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
161 /*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
162 /*
163  *      sNO -> sIV      Too late and no reason to do anything
164  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
165  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
166  *      sSR -> sIG
167  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
168  *                      are errors. Receiver will reply with RST
169  *                      and close the connection.
170  *                      Or we are not in sync and hold a dead connection.
171  *      sFW -> sIG
172  *      sCW -> sIG
173  *      sLA -> sIG
174  *      sTW -> sIG
175  *      sCL -> sIG
176  */
177 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
178 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
179 /*
180  *      sNO -> sIV      Too late and no reason to do anything...
181  *      sSS -> sIV      Client migth not send FIN in this state:
182  *                      we enforce waiting for a SYN/ACK reply first.
183  *      sS2 -> sIV
184  *      sSR -> sFW      Close started.
185  *      sES -> sFW
186  *      sFW -> sLA      FIN seen in both directions, waiting for
187  *                      the last ACK.
188  *                      Migth be a retransmitted FIN as well...
189  *      sCW -> sLA
190  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
191  *      sTW -> sTW
192  *      sCL -> sCL
193  */
194 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
195 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
196 /*
197  *      sNO -> sES      Assumed.
198  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
199  *      sS2 -> sIV
200  *      sSR -> sES      Established state is reached.
201  *      sES -> sES      :-)
202  *      sFW -> sCW      Normal close request answered by ACK.
203  *      sCW -> sCW
204  *      sLA -> sTW      Last ACK detected.
205  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
206  *      sCL -> sCL
207  */
208 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
209 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
210 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
211         },
212         {
213 /* REPLY */
214 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
215 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
216 /*
217  *      sNO -> sIV      Never reached.
218  *      sSS -> sS2      Simultaneous open
219  *      sS2 -> sS2      Retransmitted simultaneous SYN
220  *      sSR -> sIV      Invalid SYN packets sent by the server
221  *      sES -> sIV
222  *      sFW -> sIV
223  *      sCW -> sIV
224  *      sLA -> sIV
225  *      sTW -> sIV      Reopened connection, but server may not do it.
226  *      sCL -> sIV
227  */
228 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
229 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
230 /*
231  *      sSS -> sSR      Standard open.
232  *      sS2 -> sSR      Simultaneous open
233  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
234  *      sES -> sIG      Late retransmitted SYN/ACK?
235  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
236  *      sCW -> sIG
237  *      sLA -> sIG
238  *      sTW -> sIG
239  *      sCL -> sIG
240  */
241 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
242 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
243 /*
244  *      sSS -> sIV      Server might not send FIN in this state.
245  *      sS2 -> sIV
246  *      sSR -> sFW      Close started.
247  *      sES -> sFW
248  *      sFW -> sLA      FIN seen in both directions.
249  *      sCW -> sLA
250  *      sLA -> sLA      Retransmitted FIN.
251  *      sTW -> sTW
252  *      sCL -> sCL
253  */
254 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
255 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
256 /*
257  *      sSS -> sIG      Might be a half-open connection.
258  *      sS2 -> sIG
259  *      sSR -> sSR      Might answer late resent SYN.
260  *      sES -> sES      :-)
261  *      sFW -> sCW      Normal close request answered by ACK.
262  *      sCW -> sCW
263  *      sLA -> sTW      Last ACK detected.
264  *      sTW -> sTW      Retransmitted last ACK.
265  *      sCL -> sCL
266  */
267 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
268 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
269 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
270         }
271 };
272
273 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
274 {
275         return &net->ct.nf_ct_proto.tcp;
276 }
277
278 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
279                              struct nf_conntrack_tuple *tuple)
280 {
281         const struct tcphdr *hp;
282         struct tcphdr _hdr;
283
284         /* Actually only need first 8 bytes. */
285         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
286         if (hp == NULL)
287                 return false;
288
289         tuple->src.u.tcp.port = hp->source;
290         tuple->dst.u.tcp.port = hp->dest;
291
292         return true;
293 }
294
295 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
296                              const struct nf_conntrack_tuple *orig)
297 {
298         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
299         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
300         return true;
301 }
302
303 /* Print out the per-protocol part of the tuple. */
304 static int tcp_print_tuple(struct seq_file *s,
305                            const struct nf_conntrack_tuple *tuple)
306 {
307         return seq_printf(s, "sport=%hu dport=%hu ",
308                           ntohs(tuple->src.u.tcp.port),
309                           ntohs(tuple->dst.u.tcp.port));
310 }
311
312 /* Print out the private part of the conntrack. */
313 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
314 {
315         enum tcp_conntrack state;
316
317         spin_lock_bh(&ct->lock);
318         state = ct->proto.tcp.state;
319         spin_unlock_bh(&ct->lock);
320
321         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
322 }
323
324 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
325 {
326         if (tcph->rst) return TCP_RST_SET;
327         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
328         else if (tcph->fin) return TCP_FIN_SET;
329         else if (tcph->ack) return TCP_ACK_SET;
330         else return TCP_NONE_SET;
331 }
332
333 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
334    in IP Filter' by Guido van Rooij.
335
336    http://www.sane.nl/events/sane2000/papers.html
337    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
338
339    The boundaries and the conditions are changed according to RFC793:
340    the packet must intersect the window (i.e. segments may be
341    after the right or before the left edge) and thus receivers may ACK
342    segments after the right edge of the window.
343
344         td_maxend = max(sack + max(win,1)) seen in reply packets
345         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
346         td_maxwin += seq + len - sender.td_maxend
347                         if seq + len > sender.td_maxend
348         td_end    = max(seq + len) seen in sent packets
349
350    I.   Upper bound for valid data:     seq <= sender.td_maxend
351    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
352    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
353    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
354
355    where sack is the highest right edge of sack block found in the packet
356    or ack in the case of packet without SACK option.
357
358    The upper bound limit for a valid (s)ack is not ignored -
359    we doesn't have to deal with fragments.
360 */
361
362 static inline __u32 segment_seq_plus_len(__u32 seq,
363                                          size_t len,
364                                          unsigned int dataoff,
365                                          const struct tcphdr *tcph)
366 {
367         /* XXX Should I use payload length field in IP/IPv6 header ?
368          * - YK */
369         return (seq + len - dataoff - tcph->doff*4
370                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
371 }
372
373 /* Fixme: what about big packets? */
374 #define MAXACKWINCONST                  66000
375 #define MAXACKWINDOW(sender)                                            \
376         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
377                                               : MAXACKWINCONST)
378
379 /*
380  * Simplified tcp_parse_options routine from tcp_input.c
381  */
382 static void tcp_options(const struct sk_buff *skb,
383                         unsigned int dataoff,
384                         const struct tcphdr *tcph,
385                         struct ip_ct_tcp_state *state)
386 {
387         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
388         const unsigned char *ptr;
389         int length = (tcph->doff*4) - sizeof(struct tcphdr);
390
391         if (!length)
392                 return;
393
394         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
395                                  length, buff);
396         BUG_ON(ptr == NULL);
397
398         state->td_scale =
399         state->flags = 0;
400
401         while (length > 0) {
402                 int opcode=*ptr++;
403                 int opsize;
404
405                 switch (opcode) {
406                 case TCPOPT_EOL:
407                         return;
408                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
409                         length--;
410                         continue;
411                 default:
412                         opsize=*ptr++;
413                         if (opsize < 2) /* "silly options" */
414                                 return;
415                         if (opsize > length)
416                                 return; /* don't parse partial options */
417
418                         if (opcode == TCPOPT_SACK_PERM
419                             && opsize == TCPOLEN_SACK_PERM)
420                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
421                         else if (opcode == TCPOPT_WINDOW
422                                  && opsize == TCPOLEN_WINDOW) {
423                                 state->td_scale = *(u_int8_t *)ptr;
424
425                                 if (state->td_scale > 14) {
426                                         /* See RFC1323 */
427                                         state->td_scale = 14;
428                                 }
429                                 state->flags |=
430                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
431                         }
432                         ptr += opsize - 2;
433                         length -= opsize;
434                 }
435         }
436 }
437
438 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
439                      const struct tcphdr *tcph, __u32 *sack)
440 {
441         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
442         const unsigned char *ptr;
443         int length = (tcph->doff*4) - sizeof(struct tcphdr);
444         __u32 tmp;
445
446         if (!length)
447                 return;
448
449         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
450                                  length, buff);
451         BUG_ON(ptr == NULL);
452
453         /* Fast path for timestamp-only option */
454         if (length == TCPOLEN_TSTAMP_ALIGNED
455             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
456                                        | (TCPOPT_NOP << 16)
457                                        | (TCPOPT_TIMESTAMP << 8)
458                                        | TCPOLEN_TIMESTAMP))
459                 return;
460
461         while (length > 0) {
462                 int opcode = *ptr++;
463                 int opsize, i;
464
465                 switch (opcode) {
466                 case TCPOPT_EOL:
467                         return;
468                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
469                         length--;
470                         continue;
471                 default:
472                         opsize = *ptr++;
473                         if (opsize < 2) /* "silly options" */
474                                 return;
475                         if (opsize > length)
476                                 return; /* don't parse partial options */
477
478                         if (opcode == TCPOPT_SACK
479                             && opsize >= (TCPOLEN_SACK_BASE
480                                           + TCPOLEN_SACK_PERBLOCK)
481                             && !((opsize - TCPOLEN_SACK_BASE)
482                                  % TCPOLEN_SACK_PERBLOCK)) {
483                                 for (i = 0;
484                                      i < (opsize - TCPOLEN_SACK_BASE);
485                                      i += TCPOLEN_SACK_PERBLOCK) {
486                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
487
488                                         if (after(tmp, *sack))
489                                                 *sack = tmp;
490                                 }
491                                 return;
492                         }
493                         ptr += opsize - 2;
494                         length -= opsize;
495                 }
496         }
497 }
498
499 #ifdef CONFIG_NF_NAT_NEEDED
500 static inline s16 nat_offset(const struct nf_conn *ct,
501                              enum ip_conntrack_dir dir,
502                              u32 seq)
503 {
504         typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
505
506         return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
507 }
508 #define NAT_OFFSET(pf, ct, dir, seq) \
509         (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
510 #else
511 #define NAT_OFFSET(pf, ct, dir, seq)    0
512 #endif
513
514 static bool tcp_in_window(const struct nf_conn *ct,
515                           struct ip_ct_tcp *state,
516                           enum ip_conntrack_dir dir,
517                           unsigned int index,
518                           const struct sk_buff *skb,
519                           unsigned int dataoff,
520                           const struct tcphdr *tcph,
521                           u_int8_t pf)
522 {
523         struct net *net = nf_ct_net(ct);
524         struct nf_tcp_net *tn = tcp_pernet(net);
525         struct ip_ct_tcp_state *sender = &state->seen[dir];
526         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
527         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
528         __u32 seq, ack, sack, end, win, swin;
529         s16 receiver_offset;
530         bool res;
531
532         /*
533          * Get the required data from the packet.
534          */
535         seq = ntohl(tcph->seq);
536         ack = sack = ntohl(tcph->ack_seq);
537         win = ntohs(tcph->window);
538         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
539
540         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
541                 tcp_sack(skb, dataoff, tcph, &sack);
542
543         /* Take into account NAT sequence number mangling */
544         receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
545         ack -= receiver_offset;
546         sack -= receiver_offset;
547
548         pr_debug("tcp_in_window: START\n");
549         pr_debug("tcp_in_window: ");
550         nf_ct_dump_tuple(tuple);
551         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
552                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
553         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
554                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
555                  sender->td_end, sender->td_maxend, sender->td_maxwin,
556                  sender->td_scale,
557                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
558                  receiver->td_scale);
559
560         if (sender->td_maxwin == 0) {
561                 /*
562                  * Initialize sender data.
563                  */
564                 if (tcph->syn) {
565                         /*
566                          * SYN-ACK in reply to a SYN
567                          * or SYN from reply direction in simultaneous open.
568                          */
569                         sender->td_end =
570                         sender->td_maxend = end;
571                         sender->td_maxwin = (win == 0 ? 1 : win);
572
573                         tcp_options(skb, dataoff, tcph, sender);
574                         /*
575                          * RFC 1323:
576                          * Both sides must send the Window Scale option
577                          * to enable window scaling in either direction.
578                          */
579                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
580                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
581                                 sender->td_scale =
582                                 receiver->td_scale = 0;
583                         if (!tcph->ack)
584                                 /* Simultaneous open */
585                                 return true;
586                 } else {
587                         /*
588                          * We are in the middle of a connection,
589                          * its history is lost for us.
590                          * Let's try to use the data from the packet.
591                          */
592                         sender->td_end = end;
593                         swin = win << sender->td_scale;
594                         sender->td_maxwin = (swin == 0 ? 1 : swin);
595                         sender->td_maxend = end + sender->td_maxwin;
596                         /*
597                          * We haven't seen traffic in the other direction yet
598                          * but we have to tweak window tracking to pass III
599                          * and IV until that happens.
600                          */
601                         if (receiver->td_maxwin == 0)
602                                 receiver->td_end = receiver->td_maxend = sack;
603                 }
604         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
605                      && dir == IP_CT_DIR_ORIGINAL)
606                    || (state->state == TCP_CONNTRACK_SYN_RECV
607                      && dir == IP_CT_DIR_REPLY))
608                    && after(end, sender->td_end)) {
609                 /*
610                  * RFC 793: "if a TCP is reinitialized ... then it need
611                  * not wait at all; it must only be sure to use sequence
612                  * numbers larger than those recently used."
613                  */
614                 sender->td_end =
615                 sender->td_maxend = end;
616                 sender->td_maxwin = (win == 0 ? 1 : win);
617
618                 tcp_options(skb, dataoff, tcph, sender);
619         }
620
621         if (!(tcph->ack)) {
622                 /*
623                  * If there is no ACK, just pretend it was set and OK.
624                  */
625                 ack = sack = receiver->td_end;
626         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
627                     (TCP_FLAG_ACK|TCP_FLAG_RST))
628                    && (ack == 0)) {
629                 /*
630                  * Broken TCP stacks, that set ACK in RST packets as well
631                  * with zero ack value.
632                  */
633                 ack = sack = receiver->td_end;
634         }
635
636         if (seq == end
637             && (!tcph->rst
638                 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
639                 /*
640                  * Packets contains no data: we assume it is valid
641                  * and check the ack value only.
642                  * However RST segments are always validated by their
643                  * SEQ number, except when seq == 0 (reset sent answering
644                  * SYN.
645                  */
646                 seq = end = sender->td_end;
647
648         pr_debug("tcp_in_window: ");
649         nf_ct_dump_tuple(tuple);
650         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
651                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
652         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
653                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
654                  sender->td_end, sender->td_maxend, sender->td_maxwin,
655                  sender->td_scale,
656                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
657                  receiver->td_scale);
658
659         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
660                  before(seq, sender->td_maxend + 1),
661                  after(end, sender->td_end - receiver->td_maxwin - 1),
662                  before(sack, receiver->td_end + 1),
663                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
664
665         if (before(seq, sender->td_maxend + 1) &&
666             after(end, sender->td_end - receiver->td_maxwin - 1) &&
667             before(sack, receiver->td_end + 1) &&
668             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
669                 /*
670                  * Take into account window scaling (RFC 1323).
671                  */
672                 if (!tcph->syn)
673                         win <<= sender->td_scale;
674
675                 /*
676                  * Update sender data.
677                  */
678                 swin = win + (sack - ack);
679                 if (sender->td_maxwin < swin)
680                         sender->td_maxwin = swin;
681                 if (after(end, sender->td_end)) {
682                         sender->td_end = end;
683                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
684                 }
685                 if (tcph->ack) {
686                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
687                                 sender->td_maxack = ack;
688                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
689                         } else if (after(ack, sender->td_maxack))
690                                 sender->td_maxack = ack;
691                 }
692
693                 /*
694                  * Update receiver data.
695                  */
696                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
697                         receiver->td_maxwin += end - sender->td_maxend;
698                 if (after(sack + win, receiver->td_maxend - 1)) {
699                         receiver->td_maxend = sack + win;
700                         if (win == 0)
701                                 receiver->td_maxend++;
702                 }
703                 if (ack == receiver->td_end)
704                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
705
706                 /*
707                  * Check retransmissions.
708                  */
709                 if (index == TCP_ACK_SET) {
710                         if (state->last_dir == dir
711                             && state->last_seq == seq
712                             && state->last_ack == ack
713                             && state->last_end == end
714                             && state->last_win == win)
715                                 state->retrans++;
716                         else {
717                                 state->last_dir = dir;
718                                 state->last_seq = seq;
719                                 state->last_ack = ack;
720                                 state->last_end = end;
721                                 state->last_win = win;
722                                 state->retrans = 0;
723                         }
724                 }
725                 res = true;
726         } else {
727                 res = false;
728                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
729                     tn->tcp_be_liberal)
730                         res = true;
731                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
732                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
733                         "nf_ct_tcp: %s ",
734                         before(seq, sender->td_maxend + 1) ?
735                         after(end, sender->td_end - receiver->td_maxwin - 1) ?
736                         before(sack, receiver->td_end + 1) ?
737                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
738                         : "ACK is under the lower bound (possible overly delayed ACK)"
739                         : "ACK is over the upper bound (ACKed data not seen yet)"
740                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
741                         : "SEQ is over the upper bound (over the window of the receiver)");
742         }
743
744         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
745                  "receiver end=%u maxend=%u maxwin=%u\n",
746                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
747                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
748
749         return res;
750 }
751
752 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
753 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
754                                  TCPHDR_URG) + 1] =
755 {
756         [TCPHDR_SYN]                            = 1,
757         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
758         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
759         [TCPHDR_RST]                            = 1,
760         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
761         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
762         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
763         [TCPHDR_ACK]                            = 1,
764         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
765 };
766
767 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
768 static int tcp_error(struct net *net, struct nf_conn *tmpl,
769                      struct sk_buff *skb,
770                      unsigned int dataoff,
771                      enum ip_conntrack_info *ctinfo,
772                      u_int8_t pf,
773                      unsigned int hooknum)
774 {
775         const struct tcphdr *th;
776         struct tcphdr _tcph;
777         unsigned int tcplen = skb->len - dataoff;
778         u_int8_t tcpflags;
779
780         /* Smaller that minimal TCP header? */
781         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
782         if (th == NULL) {
783                 if (LOG_INVALID(net, IPPROTO_TCP))
784                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
785                                 "nf_ct_tcp: short packet ");
786                 return -NF_ACCEPT;
787         }
788
789         /* Not whole TCP header or malformed packet */
790         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
791                 if (LOG_INVALID(net, IPPROTO_TCP))
792                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
793                                 "nf_ct_tcp: truncated/malformed packet ");
794                 return -NF_ACCEPT;
795         }
796
797         /* Checksum invalid? Ignore.
798          * We skip checking packets on the outgoing path
799          * because the checksum is assumed to be correct.
800          */
801         /* FIXME: Source route IP option packets --RR */
802         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
803             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
804                 if (LOG_INVALID(net, IPPROTO_TCP))
805                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
806                                   "nf_ct_tcp: bad TCP checksum ");
807                 return -NF_ACCEPT;
808         }
809
810         /* Check TCP flags. */
811         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
812         if (!tcp_valid_flags[tcpflags]) {
813                 if (LOG_INVALID(net, IPPROTO_TCP))
814                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
815                                   "nf_ct_tcp: invalid TCP flag combination ");
816                 return -NF_ACCEPT;
817         }
818
819         return NF_ACCEPT;
820 }
821
822 static unsigned int *tcp_get_timeouts(struct net *net)
823 {
824         return tcp_pernet(net)->timeouts;
825 }
826
827 /* Returns verdict for packet, or -1 for invalid. */
828 static int tcp_packet(struct nf_conn *ct,
829                       const struct sk_buff *skb,
830                       unsigned int dataoff,
831                       enum ip_conntrack_info ctinfo,
832                       u_int8_t pf,
833                       unsigned int hooknum,
834                       unsigned int *timeouts)
835 {
836         struct net *net = nf_ct_net(ct);
837         struct nf_tcp_net *tn = tcp_pernet(net);
838         struct nf_conntrack_tuple *tuple;
839         enum tcp_conntrack new_state, old_state;
840         enum ip_conntrack_dir dir;
841         const struct tcphdr *th;
842         struct tcphdr _tcph;
843         unsigned long timeout;
844         unsigned int index;
845
846         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
847         BUG_ON(th == NULL);
848
849         spin_lock_bh(&ct->lock);
850         old_state = ct->proto.tcp.state;
851         dir = CTINFO2DIR(ctinfo);
852         index = get_conntrack_index(th);
853         new_state = tcp_conntracks[dir][index][old_state];
854         tuple = &ct->tuplehash[dir].tuple;
855
856         switch (new_state) {
857         case TCP_CONNTRACK_SYN_SENT:
858                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
859                         break;
860                 /* RFC 1122: "When a connection is closed actively,
861                  * it MUST linger in TIME-WAIT state for a time 2xMSL
862                  * (Maximum Segment Lifetime). However, it MAY accept
863                  * a new SYN from the remote TCP to reopen the connection
864                  * directly from TIME-WAIT state, if..."
865                  * We ignore the conditions because we are in the
866                  * TIME-WAIT state anyway.
867                  *
868                  * Handle aborted connections: we and the server
869                  * think there is an existing connection but the client
870                  * aborts it and starts a new one.
871                  */
872                 if (((ct->proto.tcp.seen[dir].flags
873                       | ct->proto.tcp.seen[!dir].flags)
874                      & IP_CT_TCP_FLAG_CLOSE_INIT)
875                     || (ct->proto.tcp.last_dir == dir
876                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
877                         /* Attempt to reopen a closed/aborted connection.
878                          * Delete this connection and look up again. */
879                         spin_unlock_bh(&ct->lock);
880
881                         /* Only repeat if we can actually remove the timer.
882                          * Destruction may already be in progress in process
883                          * context and we must give it a chance to terminate.
884                          */
885                         if (nf_ct_kill(ct))
886                                 return -NF_REPEAT;
887                         return NF_DROP;
888                 }
889                 /* Fall through */
890         case TCP_CONNTRACK_IGNORE:
891                 /* Ignored packets:
892                  *
893                  * Our connection entry may be out of sync, so ignore
894                  * packets which may signal the real connection between
895                  * the client and the server.
896                  *
897                  * a) SYN in ORIGINAL
898                  * b) SYN/ACK in REPLY
899                  * c) ACK in reply direction after initial SYN in original.
900                  *
901                  * If the ignored packet is invalid, the receiver will send
902                  * a RST we'll catch below.
903                  */
904                 if (index == TCP_SYNACK_SET
905                     && ct->proto.tcp.last_index == TCP_SYN_SET
906                     && ct->proto.tcp.last_dir != dir
907                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
908                         /* b) This SYN/ACK acknowledges a SYN that we earlier
909                          * ignored as invalid. This means that the client and
910                          * the server are both in sync, while the firewall is
911                          * not. We get in sync from the previously annotated
912                          * values.
913                          */
914                         old_state = TCP_CONNTRACK_SYN_SENT;
915                         new_state = TCP_CONNTRACK_SYN_RECV;
916                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
917                                 ct->proto.tcp.last_end;
918                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
919                                 ct->proto.tcp.last_end;
920                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
921                                 ct->proto.tcp.last_win == 0 ?
922                                         1 : ct->proto.tcp.last_win;
923                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
924                                 ct->proto.tcp.last_wscale;
925                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
926                                 ct->proto.tcp.last_flags;
927                         memset(&ct->proto.tcp.seen[dir], 0,
928                                sizeof(struct ip_ct_tcp_state));
929                         break;
930                 }
931                 ct->proto.tcp.last_index = index;
932                 ct->proto.tcp.last_dir = dir;
933                 ct->proto.tcp.last_seq = ntohl(th->seq);
934                 ct->proto.tcp.last_end =
935                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
936                 ct->proto.tcp.last_win = ntohs(th->window);
937
938                 /* a) This is a SYN in ORIGINAL. The client and the server
939                  * may be in sync but we are not. In that case, we annotate
940                  * the TCP options and let the packet go through. If it is a
941                  * valid SYN packet, the server will reply with a SYN/ACK, and
942                  * then we'll get in sync. Otherwise, the server ignores it. */
943                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
944                         struct ip_ct_tcp_state seen = {};
945
946                         ct->proto.tcp.last_flags =
947                         ct->proto.tcp.last_wscale = 0;
948                         tcp_options(skb, dataoff, th, &seen);
949                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
950                                 ct->proto.tcp.last_flags |=
951                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
952                                 ct->proto.tcp.last_wscale = seen.td_scale;
953                         }
954                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
955                                 ct->proto.tcp.last_flags |=
956                                         IP_CT_TCP_FLAG_SACK_PERM;
957                         }
958                 }
959                 spin_unlock_bh(&ct->lock);
960                 if (LOG_INVALID(net, IPPROTO_TCP))
961                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
962                                   "nf_ct_tcp: invalid packet ignored in "
963                                   "state %s ", tcp_conntrack_names[old_state]);
964                 return NF_ACCEPT;
965         case TCP_CONNTRACK_MAX:
966                 /* Invalid packet */
967                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
968                          dir, get_conntrack_index(th), old_state);
969                 spin_unlock_bh(&ct->lock);
970                 if (LOG_INVALID(net, IPPROTO_TCP))
971                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
972                                   "nf_ct_tcp: invalid state ");
973                 return -NF_ACCEPT;
974         case TCP_CONNTRACK_CLOSE:
975                 if (index == TCP_RST_SET
976                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
977                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
978                         /* Invalid RST  */
979                         spin_unlock_bh(&ct->lock);
980                         if (LOG_INVALID(net, IPPROTO_TCP))
981                                 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
982                                           "nf_ct_tcp: invalid RST ");
983                         return -NF_ACCEPT;
984                 }
985                 if (index == TCP_RST_SET
986                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
987                          && ct->proto.tcp.last_index == TCP_SYN_SET)
988                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
989                             && ct->proto.tcp.last_index == TCP_ACK_SET))
990                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
991                         /* RST sent to invalid SYN or ACK we had let through
992                          * at a) and c) above:
993                          *
994                          * a) SYN was in window then
995                          * c) we hold a half-open connection.
996                          *
997                          * Delete our connection entry.
998                          * We skip window checking, because packet might ACK
999                          * segments we ignored. */
1000                         goto in_window;
1001                 }
1002                 /* Just fall through */
1003         default:
1004                 /* Keep compilers happy. */
1005                 break;
1006         }
1007
1008         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1009                            skb, dataoff, th, pf)) {
1010                 spin_unlock_bh(&ct->lock);
1011                 return -NF_ACCEPT;
1012         }
1013      in_window:
1014         /* From now on we have got in-window packets */
1015         ct->proto.tcp.last_index = index;
1016         ct->proto.tcp.last_dir = dir;
1017
1018         pr_debug("tcp_conntracks: ");
1019         nf_ct_dump_tuple(tuple);
1020         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1021                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1022                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1023                  old_state, new_state);
1024
1025         ct->proto.tcp.state = new_state;
1026         if (old_state != new_state
1027             && new_state == TCP_CONNTRACK_FIN_WAIT)
1028                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1029
1030         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1031             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1032                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1033         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1034                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1035                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1036                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1037         else
1038                 timeout = timeouts[new_state];
1039         spin_unlock_bh(&ct->lock);
1040
1041         if (new_state != old_state)
1042                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1043
1044         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1045                 /* If only reply is a RST, we can consider ourselves not to
1046                    have an established connection: this is a fairly common
1047                    problem case, so we can delete the conntrack
1048                    immediately.  --RR */
1049                 if (th->rst) {
1050                         nf_ct_kill_acct(ct, ctinfo, skb);
1051                         return NF_ACCEPT;
1052                 }
1053         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1054                    && (old_state == TCP_CONNTRACK_SYN_RECV
1055                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1056                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1057                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1058                    after SYN_RECV or a valid answer for a picked up
1059                    connection. */
1060                 set_bit(IPS_ASSURED_BIT, &ct->status);
1061                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1062         }
1063         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1064
1065         return NF_ACCEPT;
1066 }
1067
1068 /* Called when a new connection for this protocol found. */
1069 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1070                     unsigned int dataoff, unsigned int *timeouts)
1071 {
1072         enum tcp_conntrack new_state;
1073         const struct tcphdr *th;
1074         struct tcphdr _tcph;
1075         struct net *net = nf_ct_net(ct);
1076         struct nf_tcp_net *tn = tcp_pernet(net);
1077         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1078         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1079
1080         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1081         BUG_ON(th == NULL);
1082
1083         /* Don't need lock here: this conntrack not in circulation yet */
1084         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1085
1086         /* Invalid: delete conntrack */
1087         if (new_state >= TCP_CONNTRACK_MAX) {
1088                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1089                 return false;
1090         }
1091
1092         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1093                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1094                 /* SYN packet */
1095                 ct->proto.tcp.seen[0].td_end =
1096                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1097                                              dataoff, th);
1098                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1099                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1100                         ct->proto.tcp.seen[0].td_maxwin = 1;
1101                 ct->proto.tcp.seen[0].td_maxend =
1102                         ct->proto.tcp.seen[0].td_end;
1103
1104                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1105         } else if (tn->tcp_loose == 0) {
1106                 /* Don't try to pick up connections. */
1107                 return false;
1108         } else {
1109                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1110                 /*
1111                  * We are in the middle of a connection,
1112                  * its history is lost for us.
1113                  * Let's try to use the data from the packet.
1114                  */
1115                 ct->proto.tcp.seen[0].td_end =
1116                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1117                                              dataoff, th);
1118                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1119                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1120                         ct->proto.tcp.seen[0].td_maxwin = 1;
1121                 ct->proto.tcp.seen[0].td_maxend =
1122                         ct->proto.tcp.seen[0].td_end +
1123                         ct->proto.tcp.seen[0].td_maxwin;
1124
1125                 /* We assume SACK and liberal window checking to handle
1126                  * window scaling */
1127                 ct->proto.tcp.seen[0].flags =
1128                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1129                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1130         }
1131
1132         /* tcp_packet will set them */
1133         ct->proto.tcp.last_index = TCP_NONE_SET;
1134
1135         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1136                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1137                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1138                  sender->td_scale,
1139                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1140                  receiver->td_scale);
1141         return true;
1142 }
1143
1144 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1145
1146 #include <linux/netfilter/nfnetlink.h>
1147 #include <linux/netfilter/nfnetlink_conntrack.h>
1148
1149 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1150                          struct nf_conn *ct)
1151 {
1152         struct nlattr *nest_parms;
1153         struct nf_ct_tcp_flags tmp = {};
1154
1155         spin_lock_bh(&ct->lock);
1156         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1157         if (!nest_parms)
1158                 goto nla_put_failure;
1159
1160         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1161             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1162                        ct->proto.tcp.seen[0].td_scale) ||
1163             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1164                        ct->proto.tcp.seen[1].td_scale))
1165                 goto nla_put_failure;
1166
1167         tmp.flags = ct->proto.tcp.seen[0].flags;
1168         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1169                     sizeof(struct nf_ct_tcp_flags), &tmp))
1170                 goto nla_put_failure;
1171
1172         tmp.flags = ct->proto.tcp.seen[1].flags;
1173         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1174                     sizeof(struct nf_ct_tcp_flags), &tmp))
1175                 goto nla_put_failure;
1176         spin_unlock_bh(&ct->lock);
1177
1178         nla_nest_end(skb, nest_parms);
1179
1180         return 0;
1181
1182 nla_put_failure:
1183         spin_unlock_bh(&ct->lock);
1184         return -1;
1185 }
1186
1187 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1188         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1189         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1190         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1191         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1192         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1193 };
1194
1195 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1196 {
1197         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1198         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1199         int err;
1200
1201         /* updates could not contain anything about the private
1202          * protocol info, in that case skip the parsing */
1203         if (!pattr)
1204                 return 0;
1205
1206         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1207         if (err < 0)
1208                 return err;
1209
1210         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1211             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1212                 return -EINVAL;
1213
1214         spin_lock_bh(&ct->lock);
1215         if (tb[CTA_PROTOINFO_TCP_STATE])
1216                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1217
1218         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1219                 struct nf_ct_tcp_flags *attr =
1220                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1221                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1222                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1223         }
1224
1225         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1226                 struct nf_ct_tcp_flags *attr =
1227                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1228                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1229                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1230         }
1231
1232         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1233             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1234             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1235             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1236                 ct->proto.tcp.seen[0].td_scale =
1237                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1238                 ct->proto.tcp.seen[1].td_scale =
1239                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1240         }
1241         spin_unlock_bh(&ct->lock);
1242
1243         return 0;
1244 }
1245
1246 static int tcp_nlattr_size(void)
1247 {
1248         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1249                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1250 }
1251
1252 static int tcp_nlattr_tuple_size(void)
1253 {
1254         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1255 }
1256 #endif
1257
1258 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1259
1260 #include <linux/netfilter/nfnetlink.h>
1261 #include <linux/netfilter/nfnetlink_cttimeout.h>
1262
1263 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1264                                      struct net *net, void *data)
1265 {
1266         unsigned int *timeouts = data;
1267         struct nf_tcp_net *tn = tcp_pernet(net);
1268         int i;
1269
1270         /* set default TCP timeouts. */
1271         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1272                 timeouts[i] = tn->timeouts[i];
1273
1274         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1275                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1276                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1277         }
1278         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1279                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1280                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1281         }
1282         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1283                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1284                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1285         }
1286         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1287                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1288                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1289         }
1290         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1291                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1292                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1293         }
1294         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1295                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1296                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1297         }
1298         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1299                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1300                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1301         }
1302         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1303                 timeouts[TCP_CONNTRACK_CLOSE] =
1304                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1305         }
1306         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1307                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1308                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1309         }
1310         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1311                 timeouts[TCP_CONNTRACK_RETRANS] =
1312                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1313         }
1314         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1315                 timeouts[TCP_CONNTRACK_UNACK] =
1316                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1317         }
1318         return 0;
1319 }
1320
1321 static int
1322 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1323 {
1324         const unsigned int *timeouts = data;
1325
1326         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1327                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1328             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1329                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1330             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1331                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1332             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1333                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1334             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1335                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1336             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1337                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1338             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1339                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1340             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1341                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1342             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1343                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1344             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1345                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1346             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1347                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1348                 goto nla_put_failure;
1349         return 0;
1350
1351 nla_put_failure:
1352         return -ENOSPC;
1353 }
1354
1355 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1356         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1357         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1358         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1359         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1360         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1361         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1362         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1363         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1364         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1365 };
1366 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1367
1368 #ifdef CONFIG_SYSCTL
1369 static struct ctl_table tcp_sysctl_table[] = {
1370         {
1371                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1372                 .maxlen         = sizeof(unsigned int),
1373                 .mode           = 0644,
1374                 .proc_handler   = proc_dointvec_jiffies,
1375         },
1376         {
1377                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1378                 .maxlen         = sizeof(unsigned int),
1379                 .mode           = 0644,
1380                 .proc_handler   = proc_dointvec_jiffies,
1381         },
1382         {
1383                 .procname       = "nf_conntrack_tcp_timeout_established",
1384                 .maxlen         = sizeof(unsigned int),
1385                 .mode           = 0644,
1386                 .proc_handler   = proc_dointvec_jiffies,
1387         },
1388         {
1389                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1390                 .maxlen         = sizeof(unsigned int),
1391                 .mode           = 0644,
1392                 .proc_handler   = proc_dointvec_jiffies,
1393         },
1394         {
1395                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1396                 .maxlen         = sizeof(unsigned int),
1397                 .mode           = 0644,
1398                 .proc_handler   = proc_dointvec_jiffies,
1399         },
1400         {
1401                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1402                 .maxlen         = sizeof(unsigned int),
1403                 .mode           = 0644,
1404                 .proc_handler   = proc_dointvec_jiffies,
1405         },
1406         {
1407                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1408                 .maxlen         = sizeof(unsigned int),
1409                 .mode           = 0644,
1410                 .proc_handler   = proc_dointvec_jiffies,
1411         },
1412         {
1413                 .procname       = "nf_conntrack_tcp_timeout_close",
1414                 .maxlen         = sizeof(unsigned int),
1415                 .mode           = 0644,
1416                 .proc_handler   = proc_dointvec_jiffies,
1417         },
1418         {
1419                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1420                 .maxlen         = sizeof(unsigned int),
1421                 .mode           = 0644,
1422                 .proc_handler   = proc_dointvec_jiffies,
1423         },
1424         {
1425                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1426                 .maxlen         = sizeof(unsigned int),
1427                 .mode           = 0644,
1428                 .proc_handler   = proc_dointvec_jiffies,
1429         },
1430         {
1431                 .procname       = "nf_conntrack_tcp_loose",
1432                 .maxlen         = sizeof(unsigned int),
1433                 .mode           = 0644,
1434                 .proc_handler   = proc_dointvec,
1435         },
1436         {
1437                 .procname       = "nf_conntrack_tcp_be_liberal",
1438                 .maxlen         = sizeof(unsigned int),
1439                 .mode           = 0644,
1440                 .proc_handler   = proc_dointvec,
1441         },
1442         {
1443                 .procname       = "nf_conntrack_tcp_max_retrans",
1444                 .maxlen         = sizeof(unsigned int),
1445                 .mode           = 0644,
1446                 .proc_handler   = proc_dointvec,
1447         },
1448         { }
1449 };
1450
1451 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1452 static struct ctl_table tcp_compat_sysctl_table[] = {
1453         {
1454                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1455                 .maxlen         = sizeof(unsigned int),
1456                 .mode           = 0644,
1457                 .proc_handler   = proc_dointvec_jiffies,
1458         },
1459         {
1460                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1461                 .maxlen         = sizeof(unsigned int),
1462                 .mode           = 0644,
1463                 .proc_handler   = proc_dointvec_jiffies,
1464         },
1465         {
1466                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1467                 .maxlen         = sizeof(unsigned int),
1468                 .mode           = 0644,
1469                 .proc_handler   = proc_dointvec_jiffies,
1470         },
1471         {
1472                 .procname       = "ip_conntrack_tcp_timeout_established",
1473                 .maxlen         = sizeof(unsigned int),
1474                 .mode           = 0644,
1475                 .proc_handler   = proc_dointvec_jiffies,
1476         },
1477         {
1478                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1479                 .maxlen         = sizeof(unsigned int),
1480                 .mode           = 0644,
1481                 .proc_handler   = proc_dointvec_jiffies,
1482         },
1483         {
1484                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1485                 .maxlen         = sizeof(unsigned int),
1486                 .mode           = 0644,
1487                 .proc_handler   = proc_dointvec_jiffies,
1488         },
1489         {
1490                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1491                 .maxlen         = sizeof(unsigned int),
1492                 .mode           = 0644,
1493                 .proc_handler   = proc_dointvec_jiffies,
1494         },
1495         {
1496                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1497                 .maxlen         = sizeof(unsigned int),
1498                 .mode           = 0644,
1499                 .proc_handler   = proc_dointvec_jiffies,
1500         },
1501         {
1502                 .procname       = "ip_conntrack_tcp_timeout_close",
1503                 .maxlen         = sizeof(unsigned int),
1504                 .mode           = 0644,
1505                 .proc_handler   = proc_dointvec_jiffies,
1506         },
1507         {
1508                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1509                 .maxlen         = sizeof(unsigned int),
1510                 .mode           = 0644,
1511                 .proc_handler   = proc_dointvec_jiffies,
1512         },
1513         {
1514                 .procname       = "ip_conntrack_tcp_loose",
1515                 .maxlen         = sizeof(unsigned int),
1516                 .mode           = 0644,
1517                 .proc_handler   = proc_dointvec,
1518         },
1519         {
1520                 .procname       = "ip_conntrack_tcp_be_liberal",
1521                 .maxlen         = sizeof(unsigned int),
1522                 .mode           = 0644,
1523                 .proc_handler   = proc_dointvec,
1524         },
1525         {
1526                 .procname       = "ip_conntrack_tcp_max_retrans",
1527                 .maxlen         = sizeof(unsigned int),
1528                 .mode           = 0644,
1529                 .proc_handler   = proc_dointvec,
1530         },
1531         { }
1532 };
1533 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1534 #endif /* CONFIG_SYSCTL */
1535
1536 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1537                                     struct nf_tcp_net *tn)
1538 {
1539 #ifdef CONFIG_SYSCTL
1540         if (pn->ctl_table)
1541                 return 0;
1542
1543         pn->ctl_table = kmemdup(tcp_sysctl_table,
1544                                 sizeof(tcp_sysctl_table),
1545                                 GFP_KERNEL);
1546         if (!pn->ctl_table)
1547                 return -ENOMEM;
1548
1549         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1550         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1551         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1552         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1553         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1554         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1555         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1556         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1557         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1558         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1559         pn->ctl_table[10].data = &tn->tcp_loose;
1560         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1561         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1562 #endif
1563         return 0;
1564 }
1565
1566 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1567                                            struct nf_tcp_net *tn)
1568 {
1569 #ifdef CONFIG_SYSCTL
1570 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1571         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1572                                        sizeof(tcp_compat_sysctl_table),
1573                                        GFP_KERNEL);
1574         if (!pn->ctl_compat_table)
1575                 return -ENOMEM;
1576
1577         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1578         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1579         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1580         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1581         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1582         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1583         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1584         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1585         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1586         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1587         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1588         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1589         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1590 #endif
1591 #endif
1592         return 0;
1593 }
1594
1595 static int tcp_init_net(struct net *net, u_int16_t proto)
1596 {
1597         int ret;
1598         struct nf_tcp_net *tn = tcp_pernet(net);
1599         struct nf_proto_net *pn = &tn->pn;
1600
1601         if (!pn->users) {
1602                 int i;
1603
1604                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1605                         tn->timeouts[i] = tcp_timeouts[i];
1606
1607                 tn->tcp_loose = nf_ct_tcp_loose;
1608                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1609                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1610         }
1611
1612         if (proto == AF_INET) {
1613                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1614                 if (ret < 0)
1615                         return ret;
1616
1617                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1618                 if (ret < 0)
1619                         nf_ct_kfree_compat_sysctl_table(pn);
1620         } else
1621                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1622
1623         return ret;
1624 }
1625
1626 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1627 {
1628         return &net->ct.nf_ct_proto.tcp.pn;
1629 }
1630
1631 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1632 {
1633         .l3proto                = PF_INET,
1634         .l4proto                = IPPROTO_TCP,
1635         .name                   = "tcp",
1636         .pkt_to_tuple           = tcp_pkt_to_tuple,
1637         .invert_tuple           = tcp_invert_tuple,
1638         .print_tuple            = tcp_print_tuple,
1639         .print_conntrack        = tcp_print_conntrack,
1640         .packet                 = tcp_packet,
1641         .get_timeouts           = tcp_get_timeouts,
1642         .new                    = tcp_new,
1643         .error                  = tcp_error,
1644 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1645         .to_nlattr              = tcp_to_nlattr,
1646         .nlattr_size            = tcp_nlattr_size,
1647         .from_nlattr            = nlattr_to_tcp,
1648         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1649         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1650         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1651         .nla_policy             = nf_ct_port_nla_policy,
1652 #endif
1653 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1654         .ctnl_timeout           = {
1655                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1656                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1657                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1658                 .obj_size       = sizeof(unsigned int) *
1659                                         TCP_CONNTRACK_TIMEOUT_MAX,
1660                 .nla_policy     = tcp_timeout_nla_policy,
1661         },
1662 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1663         .init_net               = tcp_init_net,
1664         .get_net_proto          = tcp_get_net_proto,
1665 };
1666 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1667
1668 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1669 {
1670         .l3proto                = PF_INET6,
1671         .l4proto                = IPPROTO_TCP,
1672         .name                   = "tcp",
1673         .pkt_to_tuple           = tcp_pkt_to_tuple,
1674         .invert_tuple           = tcp_invert_tuple,
1675         .print_tuple            = tcp_print_tuple,
1676         .print_conntrack        = tcp_print_conntrack,
1677         .packet                 = tcp_packet,
1678         .get_timeouts           = tcp_get_timeouts,
1679         .new                    = tcp_new,
1680         .error                  = tcp_error,
1681 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1682         .to_nlattr              = tcp_to_nlattr,
1683         .nlattr_size            = tcp_nlattr_size,
1684         .from_nlattr            = nlattr_to_tcp,
1685         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1686         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1687         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1688         .nla_policy             = nf_ct_port_nla_policy,
1689 #endif
1690 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1691         .ctnl_timeout           = {
1692                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1693                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1694                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1695                 .obj_size       = sizeof(unsigned int) *
1696                                         TCP_CONNTRACK_TIMEOUT_MAX,
1697                 .nla_policy     = tcp_timeout_nla_policy,
1698         },
1699 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1700         .init_net               = tcp_init_net,
1701         .get_net_proto          = tcp_get_net_proto,
1702 };
1703 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);