Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[pandora-kernel.git] / net / ipv4 / netfilter / ip_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
9  *      - Real stateful connection tracking
10  *      - Modified state transitions table
11  *      - Window scaling support added
12  *      - SACK support added
13  *
14  * Willy Tarreau:
15  *      - State table bugfixes
16  *      - More robust state changes
17  *      - Tuning timer parameters
18  *
19  * version 2.2
20  */
21
22 #include <linux/types.h>
23 #include <linux/timer.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/in.h>
27 #include <linux/ip.h>
28 #include <linux/tcp.h>
29 #include <linux/spinlock.h>
30
31 #include <net/tcp.h>
32
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/netfilter_ipv4/ip_conntrack.h>
35 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
36
37 #if 0
38 #define DEBUGP printk
39 #define DEBUGP_VARS
40 #else
41 #define DEBUGP(format, args...)
42 #endif
43
44 /* Protects conntrack->proto.tcp */
45 static DEFINE_RWLOCK(tcp_lock);
46
47 /* "Be conservative in what you do,
48     be liberal in what you accept from others."
49     If it's non-zero, we mark only out of window RST segments as INVALID. */
50 int ip_ct_tcp_be_liberal __read_mostly = 0;
51
52 /* If it is set to zero, we disable picking up already established
53    connections. */
54 int ip_ct_tcp_loose __read_mostly = 1;
55
56 /* Max number of the retransmitted packets without receiving an (acceptable)
57    ACK from the destination. If this number is reached, a shorter timer
58    will be started. */
59 int ip_ct_tcp_max_retrans __read_mostly = 3;
60
61   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
62      closely.  They're more complex. --RR */
63
64 static const char *tcp_conntrack_names[] = {
65         "NONE",
66         "SYN_SENT",
67         "SYN_RECV",
68         "ESTABLISHED",
69         "FIN_WAIT",
70         "CLOSE_WAIT",
71         "LAST_ACK",
72         "TIME_WAIT",
73         "CLOSE",
74         "LISTEN"
75 };
76
77 #define SECS * HZ
78 #define MINS * 60 SECS
79 #define HOURS * 60 MINS
80 #define DAYS * 24 HOURS
81
82 unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
83 unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
84 unsigned int ip_ct_tcp_timeout_established __read_mostly =   5 DAYS;
85 unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
86 unsigned int ip_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
87 unsigned int ip_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
88 unsigned int ip_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
89 unsigned int ip_ct_tcp_timeout_close __read_mostly =        10 SECS;
90
91 /* RFC1122 says the R2 limit should be at least 100 seconds.
92    Linux uses 15 packets as limit, which corresponds
93    to ~13-30min depending on RTO. */
94 unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
95
96 static const unsigned int * tcp_timeouts[]
97 = { NULL,                              /*      TCP_CONNTRACK_NONE */
98     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
99     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
100     &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
101     &ip_ct_tcp_timeout_fin_wait,       /*      TCP_CONNTRACK_FIN_WAIT, */
102     &ip_ct_tcp_timeout_close_wait,     /*      TCP_CONNTRACK_CLOSE_WAIT,       */
103     &ip_ct_tcp_timeout_last_ack,       /*      TCP_CONNTRACK_LAST_ACK, */
104     &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
105     &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
106     NULL,                              /*      TCP_CONNTRACK_LISTEN */
107  };
108
109 #define sNO TCP_CONNTRACK_NONE
110 #define sSS TCP_CONNTRACK_SYN_SENT
111 #define sSR TCP_CONNTRACK_SYN_RECV
112 #define sES TCP_CONNTRACK_ESTABLISHED
113 #define sFW TCP_CONNTRACK_FIN_WAIT
114 #define sCW TCP_CONNTRACK_CLOSE_WAIT
115 #define sLA TCP_CONNTRACK_LAST_ACK
116 #define sTW TCP_CONNTRACK_TIME_WAIT
117 #define sCL TCP_CONNTRACK_CLOSE
118 #define sLI TCP_CONNTRACK_LISTEN
119 #define sIV TCP_CONNTRACK_MAX
120 #define sIG TCP_CONNTRACK_IGNORE
121
122 /* What TCP flags are set from RST/SYN/FIN/ACK. */
123 enum tcp_bit_set {
124         TCP_SYN_SET,
125         TCP_SYNACK_SET,
126         TCP_FIN_SET,
127         TCP_ACK_SET,
128         TCP_RST_SET,
129         TCP_NONE_SET,
130 };
131
132 /*
133  * The TCP state transition table needs a few words...
134  *
135  * We are the man in the middle. All the packets go through us
136  * but might get lost in transit to the destination.
137  * It is assumed that the destinations can't receive segments
138  * we haven't seen.
139  *
140  * The checked segment is in window, but our windows are *not*
141  * equivalent with the ones of the sender/receiver. We always
142  * try to guess the state of the current sender.
143  *
144  * The meaning of the states are:
145  *
146  * NONE:        initial state
147  * SYN_SENT:    SYN-only packet seen
148  * SYN_RECV:    SYN-ACK packet seen
149  * ESTABLISHED: ACK packet seen
150  * FIN_WAIT:    FIN packet seen
151  * CLOSE_WAIT:  ACK seen (after FIN)
152  * LAST_ACK:    FIN seen (after FIN)
153  * TIME_WAIT:   last ACK seen
154  * CLOSE:       closed connection
155  *
156  * LISTEN state is not used.
157  *
158  * Packets marked as IGNORED (sIG):
159  *      if they may be either invalid or valid
160  *      and the receiver may send back a connection
161  *      closing RST or a SYN/ACK.
162  *
163  * Packets marked as INVALID (sIV):
164  *      if they are invalid
165  *      or we do not support the request (simultaneous open)
166  */
167 static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
168         {
169 /* ORIGINAL */
170 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
171 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
172 /*
173  *      sNO -> sSS      Initialize a new connection
174  *      sSS -> sSS      Retransmitted SYN
175  *      sSR -> sIG      Late retransmitted SYN?
176  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
177  *                      are errors. Receiver will reply with RST
178  *                      and close the connection.
179  *                      Or we are not in sync and hold a dead connection.
180  *      sFW -> sIG
181  *      sCW -> sIG
182  *      sLA -> sIG
183  *      sTW -> sSS      Reopened connection (RFC 1122).
184  *      sCL -> sSS
185  */
186 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
187 /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
188 /*
189  * A SYN/ACK from the client is always invalid:
190  *      - either it tries to set up a simultaneous open, which is
191  *        not supported;
192  *      - or the firewall has just been inserted between the two hosts
193  *        during the session set-up. The SYN will be retransmitted
194  *        by the true client (or it'll time out).
195  */
196 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
197 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
198 /*
199  *      sNO -> sIV      Too late and no reason to do anything...
200  *      sSS -> sIV      Client migth not send FIN in this state:
201  *                      we enforce waiting for a SYN/ACK reply first.
202  *      sSR -> sFW      Close started.
203  *      sES -> sFW
204  *      sFW -> sLA      FIN seen in both directions, waiting for
205  *                      the last ACK.
206  *                      Migth be a retransmitted FIN as well...
207  *      sCW -> sLA
208  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
209  *      sTW -> sTW
210  *      sCL -> sCL
211  */
212 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
213 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
214 /*
215  *      sNO -> sES      Assumed.
216  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
217  *      sSR -> sES      Established state is reached.
218  *      sES -> sES      :-)
219  *      sFW -> sCW      Normal close request answered by ACK.
220  *      sCW -> sCW
221  *      sLA -> sTW      Last ACK detected.
222  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
223  *      sCL -> sCL
224  */
225 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
226 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
227 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
228         },
229         {
230 /* REPLY */
231 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
232 /*syn*/    { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
233 /*
234  *      sNO -> sIV      Never reached.
235  *      sSS -> sIV      Simultaneous open, not supported
236  *      sSR -> sIV      Simultaneous open, not supported.
237  *      sES -> sIV      Server may not initiate a connection.
238  *      sFW -> sIV
239  *      sCW -> sIV
240  *      sLA -> sIV
241  *      sTW -> sIV      Reopened connection, but server may not do it.
242  *      sCL -> sIV
243  */
244 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
245 /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
246 /*
247  *      sSS -> sSR      Standard open.
248  *      sSR -> sSR      Retransmitted SYN/ACK.
249  *      sES -> sIG      Late retransmitted SYN/ACK?
250  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
251  *      sCW -> sIG
252  *      sLA -> sIG
253  *      sTW -> sIG
254  *      sCL -> sIG
255  */
256 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
257 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
258 /*
259  *      sSS -> sIV      Server might not send FIN in this state.
260  *      sSR -> sFW      Close started.
261  *      sES -> sFW
262  *      sFW -> sLA      FIN seen in both directions.
263  *      sCW -> sLA
264  *      sLA -> sLA      Retransmitted FIN.
265  *      sTW -> sTW
266  *      sCL -> sCL
267  */
268 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
269 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
270 /*
271  *      sSS -> sIG      Might be a half-open connection.
272  *      sSR -> sSR      Might answer late resent SYN.
273  *      sES -> sES      :-)
274  *      sFW -> sCW      Normal close request answered by ACK.
275  *      sCW -> sCW
276  *      sLA -> sTW      Last ACK detected.
277  *      sTW -> sTW      Retransmitted last ACK.
278  *      sCL -> sCL
279  */
280 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
281 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
282 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
283         }
284 };
285
286 static int tcp_pkt_to_tuple(const struct sk_buff *skb,
287                             unsigned int dataoff,
288                             struct ip_conntrack_tuple *tuple)
289 {
290         struct tcphdr _hdr, *hp;
291
292         /* Actually only need first 8 bytes. */
293         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
294         if (hp == NULL)
295                 return 0;
296
297         tuple->src.u.tcp.port = hp->source;
298         tuple->dst.u.tcp.port = hp->dest;
299
300         return 1;
301 }
302
303 static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
304                             const struct ip_conntrack_tuple *orig)
305 {
306         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
307         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
308         return 1;
309 }
310
311 /* Print out the per-protocol part of the tuple. */
312 static int tcp_print_tuple(struct seq_file *s,
313                            const struct ip_conntrack_tuple *tuple)
314 {
315         return seq_printf(s, "sport=%hu dport=%hu ",
316                           ntohs(tuple->src.u.tcp.port),
317                           ntohs(tuple->dst.u.tcp.port));
318 }
319
320 /* Print out the private part of the conntrack. */
321 static int tcp_print_conntrack(struct seq_file *s,
322                                const struct ip_conntrack *conntrack)
323 {
324         enum tcp_conntrack state;
325
326         read_lock_bh(&tcp_lock);
327         state = conntrack->proto.tcp.state;
328         read_unlock_bh(&tcp_lock);
329
330         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
331 }
332
333 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
334     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
335 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
336                          const struct ip_conntrack *ct)
337 {
338         struct nfattr *nest_parms;
339
340         read_lock_bh(&tcp_lock);
341         nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
342         NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
343                 &ct->proto.tcp.state);
344         read_unlock_bh(&tcp_lock);
345
346         NFA_NEST_END(skb, nest_parms);
347
348         return 0;
349
350 nfattr_failure:
351         read_unlock_bh(&tcp_lock);
352         return -1;
353 }
354
355 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
356         [CTA_PROTOINFO_TCP_STATE-1]     = sizeof(u_int8_t),
357 };
358
359 static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
360 {
361         struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
362         struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
363
364         /* updates could not contain anything about the private
365          * protocol info, in that case skip the parsing */
366         if (!attr)
367                 return 0;
368
369         nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
370
371         if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
372                 return -EINVAL;
373
374         if (!tb[CTA_PROTOINFO_TCP_STATE-1])
375                 return -EINVAL;
376
377         write_lock_bh(&tcp_lock);
378         ct->proto.tcp.state =
379                 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
380         write_unlock_bh(&tcp_lock);
381
382         return 0;
383 }
384 #endif
385
386 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
387 {
388         if (tcph->rst) return TCP_RST_SET;
389         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
390         else if (tcph->fin) return TCP_FIN_SET;
391         else if (tcph->ack) return TCP_ACK_SET;
392         else return TCP_NONE_SET;
393 }
394
395 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
396    in IP Filter' by Guido van Rooij.
397
398    http://www.nluug.nl/events/sane2000/papers.html
399    http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
400
401    The boundaries and the conditions are changed according to RFC793:
402    the packet must intersect the window (i.e. segments may be
403    after the right or before the left edge) and thus receivers may ACK
404    segments after the right edge of the window.
405
406         td_maxend = max(sack + max(win,1)) seen in reply packets
407         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
408         td_maxwin += seq + len - sender.td_maxend
409                         if seq + len > sender.td_maxend
410         td_end    = max(seq + len) seen in sent packets
411
412    I.   Upper bound for valid data:     seq <= sender.td_maxend
413    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
414    III. Upper bound for valid ack:      sack <= receiver.td_end
415    IV.  Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
416
417    where sack is the highest right edge of sack block found in the packet.
418
419    The upper bound limit for a valid ack is not ignored -
420    we doesn't have to deal with fragments.
421 */
422
423 static inline __u32 segment_seq_plus_len(__u32 seq,
424                                          size_t len,
425                                          struct iphdr *iph,
426                                          struct tcphdr *tcph)
427 {
428         return (seq + len - (iph->ihl + tcph->doff)*4
429                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
430 }
431
432 /* Fixme: what about big packets? */
433 #define MAXACKWINCONST                  66000
434 #define MAXACKWINDOW(sender)                                            \
435         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
436                                               : MAXACKWINCONST)
437
438 /*
439  * Simplified tcp_parse_options routine from tcp_input.c
440  */
441 static void tcp_options(const struct sk_buff *skb,
442                         struct iphdr *iph,
443                         struct tcphdr *tcph,
444                         struct ip_ct_tcp_state *state)
445 {
446         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
447         unsigned char *ptr;
448         int length = (tcph->doff*4) - sizeof(struct tcphdr);
449
450         if (!length)
451                 return;
452
453         ptr = skb_header_pointer(skb,
454                                  (iph->ihl * 4) + sizeof(struct tcphdr),
455                                  length, buff);
456         BUG_ON(ptr == NULL);
457
458         state->td_scale =
459         state->flags = 0;
460
461         while (length > 0) {
462                 int opcode=*ptr++;
463                 int opsize;
464
465                 switch (opcode) {
466                 case TCPOPT_EOL:
467                         return;
468                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
469                         length--;
470                         continue;
471                 default:
472                         opsize=*ptr++;
473                         if (opsize < 2) /* "silly options" */
474                                 return;
475                         if (opsize > length)
476                                 break;  /* don't parse partial options */
477
478                         if (opcode == TCPOPT_SACK_PERM
479                             && opsize == TCPOLEN_SACK_PERM)
480                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
481                         else if (opcode == TCPOPT_WINDOW
482                                  && opsize == TCPOLEN_WINDOW) {
483                                 state->td_scale = *(u_int8_t *)ptr;
484
485                                 if (state->td_scale > 14) {
486                                         /* See RFC1323 */
487                                         state->td_scale = 14;
488                                 }
489                                 state->flags |=
490                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
491                         }
492                         ptr += opsize - 2;
493                         length -= opsize;
494                 }
495         }
496 }
497
498 static void tcp_sack(const struct sk_buff *skb,
499                      struct iphdr *iph,
500                      struct tcphdr *tcph,
501                      __u32 *sack)
502 {
503         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
504         unsigned char *ptr;
505         int length = (tcph->doff*4) - sizeof(struct tcphdr);
506         __u32 tmp;
507
508         if (!length)
509                 return;
510
511         ptr = skb_header_pointer(skb,
512                                  (iph->ihl * 4) + sizeof(struct tcphdr),
513                                  length, buff);
514         BUG_ON(ptr == NULL);
515
516         /* Fast path for timestamp-only option */
517         if (length == TCPOLEN_TSTAMP_ALIGNED*4
518             && *(__be32 *)ptr ==
519                 __constant_htonl((TCPOPT_NOP << 24)
520                                  | (TCPOPT_NOP << 16)
521                                  | (TCPOPT_TIMESTAMP << 8)
522                                  | TCPOLEN_TIMESTAMP))
523                 return;
524
525         while (length > 0) {
526                 int opcode=*ptr++;
527                 int opsize, i;
528
529                 switch (opcode) {
530                 case TCPOPT_EOL:
531                         return;
532                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
533                         length--;
534                         continue;
535                 default:
536                         opsize=*ptr++;
537                         if (opsize < 2) /* "silly options" */
538                                 return;
539                         if (opsize > length)
540                                 break;  /* don't parse partial options */
541
542                         if (opcode == TCPOPT_SACK
543                             && opsize >= (TCPOLEN_SACK_BASE
544                                           + TCPOLEN_SACK_PERBLOCK)
545                             && !((opsize - TCPOLEN_SACK_BASE)
546                                  % TCPOLEN_SACK_PERBLOCK)) {
547                                 for (i = 0;
548                                      i < (opsize - TCPOLEN_SACK_BASE);
549                                      i += TCPOLEN_SACK_PERBLOCK) {
550                                         tmp = ntohl(*((__be32 *)(ptr+i)+1));
551
552                                         if (after(tmp, *sack))
553                                                 *sack = tmp;
554                                 }
555                                 return;
556                         }
557                         ptr += opsize - 2;
558                         length -= opsize;
559                 }
560         }
561 }
562
563 static int tcp_in_window(struct ip_ct_tcp *state,
564                          enum ip_conntrack_dir dir,
565                          unsigned int index,
566                          const struct sk_buff *skb,
567                          struct iphdr *iph,
568                          struct tcphdr *tcph)
569 {
570         struct ip_ct_tcp_state *sender = &state->seen[dir];
571         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
572         __u32 seq, ack, sack, end, win, swin;
573         int res;
574
575         /*
576          * Get the required data from the packet.
577          */
578         seq = ntohl(tcph->seq);
579         ack = sack = ntohl(tcph->ack_seq);
580         win = ntohs(tcph->window);
581         end = segment_seq_plus_len(seq, skb->len, iph, tcph);
582
583         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
584                 tcp_sack(skb, iph, tcph, &sack);
585
586         DEBUGP("tcp_in_window: START\n");
587         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
588                "seq=%u ack=%u sack=%u win=%u end=%u\n",
589                 NIPQUAD(iph->saddr), ntohs(tcph->source),
590                 NIPQUAD(iph->daddr), ntohs(tcph->dest),
591                 seq, ack, sack, win, end);
592         DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
593                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
594                 sender->td_end, sender->td_maxend, sender->td_maxwin,
595                 sender->td_scale,
596                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
597                 receiver->td_scale);
598
599         if (sender->td_end == 0) {
600                 /*
601                  * Initialize sender data.
602                  */
603                 if (tcph->syn && tcph->ack) {
604                         /*
605                          * Outgoing SYN-ACK in reply to a SYN.
606                          */
607                         sender->td_end =
608                         sender->td_maxend = end;
609                         sender->td_maxwin = (win == 0 ? 1 : win);
610
611                         tcp_options(skb, iph, tcph, sender);
612                         /*
613                          * RFC 1323:
614                          * Both sides must send the Window Scale option
615                          * to enable window scaling in either direction.
616                          */
617                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
618                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
619                                 sender->td_scale =
620                                 receiver->td_scale = 0;
621                 } else {
622                         /*
623                          * We are in the middle of a connection,
624                          * its history is lost for us.
625                          * Let's try to use the data from the packet.
626                          */
627                         sender->td_end = end;
628                         sender->td_maxwin = (win == 0 ? 1 : win);
629                         sender->td_maxend = end + sender->td_maxwin;
630                 }
631         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
632                      && dir == IP_CT_DIR_ORIGINAL)
633                     || (state->state == TCP_CONNTRACK_SYN_RECV
634                         && dir == IP_CT_DIR_REPLY))
635                     && after(end, sender->td_end)) {
636                 /*
637                  * RFC 793: "if a TCP is reinitialized ... then it need
638                  * not wait at all; it must only be sure to use sequence
639                  * numbers larger than those recently used."
640                  */
641                 sender->td_end =
642                 sender->td_maxend = end;
643                 sender->td_maxwin = (win == 0 ? 1 : win);
644
645                 tcp_options(skb, iph, tcph, sender);
646         }
647
648         if (!(tcph->ack)) {
649                 /*
650                  * If there is no ACK, just pretend it was set and OK.
651                  */
652                 ack = sack = receiver->td_end;
653         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
654                     (TCP_FLAG_ACK|TCP_FLAG_RST))
655                    && (ack == 0)) {
656                 /*
657                  * Broken TCP stacks, that set ACK in RST packets as well
658                  * with zero ack value.
659                  */
660                 ack = sack = receiver->td_end;
661         }
662
663         if (seq == end
664             && (!tcph->rst
665                 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
666                 /*
667                  * Packets contains no data: we assume it is valid
668                  * and check the ack value only.
669                  * However RST segments are always validated by their
670                  * SEQ number, except when seq == 0 (reset sent answering
671                  * SYN.
672                  */
673                 seq = end = sender->td_end;
674
675         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
676                "seq=%u ack=%u sack =%u win=%u end=%u\n",
677                 NIPQUAD(iph->saddr), ntohs(tcph->source),
678                 NIPQUAD(iph->daddr), ntohs(tcph->dest),
679                 seq, ack, sack, win, end);
680         DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
681                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
682                 sender->td_end, sender->td_maxend, sender->td_maxwin,
683                 sender->td_scale,
684                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
685                 receiver->td_scale);
686
687         DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
688                 before(seq, sender->td_maxend + 1),
689                 after(end, sender->td_end - receiver->td_maxwin - 1),
690                 before(sack, receiver->td_end + 1),
691                 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
692
693         if (before(seq, sender->td_maxend + 1) &&
694             after(end, sender->td_end - receiver->td_maxwin - 1) &&
695             before(sack, receiver->td_end + 1) &&
696             after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
697                 /*
698                  * Take into account window scaling (RFC 1323).
699                  */
700                 if (!tcph->syn)
701                         win <<= sender->td_scale;
702
703                 /*
704                  * Update sender data.
705                  */
706                 swin = win + (sack - ack);
707                 if (sender->td_maxwin < swin)
708                         sender->td_maxwin = swin;
709                 if (after(end, sender->td_end))
710                         sender->td_end = end;
711                 /*
712                  * Update receiver data.
713                  */
714                 if (after(end, sender->td_maxend))
715                         receiver->td_maxwin += end - sender->td_maxend;
716                 if (after(sack + win, receiver->td_maxend - 1)) {
717                         receiver->td_maxend = sack + win;
718                         if (win == 0)
719                                 receiver->td_maxend++;
720                 }
721
722                 /*
723                  * Check retransmissions.
724                  */
725                 if (index == TCP_ACK_SET) {
726                         if (state->last_dir == dir
727                             && state->last_seq == seq
728                             && state->last_ack == ack
729                             && state->last_end == end
730                             && state->last_win == win)
731                                 state->retrans++;
732                         else {
733                                 state->last_dir = dir;
734                                 state->last_seq = seq;
735                                 state->last_ack = ack;
736                                 state->last_end = end;
737                                 state->last_win = win;
738                                 state->retrans = 0;
739                         }
740                 }
741                 res = 1;
742         } else {
743                 res = 0;
744                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
745                     ip_ct_tcp_be_liberal)
746                         res = 1;
747                 if (!res && LOG_INVALID(IPPROTO_TCP))
748                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
749                         "ip_ct_tcp: %s ",
750                         before(seq, sender->td_maxend + 1) ?
751                         after(end, sender->td_end - receiver->td_maxwin - 1) ?
752                         before(sack, receiver->td_end + 1) ?
753                         after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
754                         : "ACK is under the lower bound (possible overly delayed ACK)"
755                         : "ACK is over the upper bound (ACKed data not seen yet)"
756                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
757                         : "SEQ is over the upper bound (over the window of the receiver)");
758         }
759
760         DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
761                "receiver end=%u maxend=%u maxwin=%u\n",
762                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
763                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
764
765         return res;
766 }
767
768 #ifdef CONFIG_IP_NF_NAT_NEEDED
769 /* Update sender->td_end after NAT successfully mangled the packet */
770 void ip_conntrack_tcp_update(struct sk_buff *skb,
771                              struct ip_conntrack *conntrack,
772                              enum ip_conntrack_dir dir)
773 {
774         struct iphdr *iph = skb->nh.iph;
775         struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
776         __u32 end;
777 #ifdef DEBUGP_VARS
778         struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
779         struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
780 #endif
781
782         end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
783
784         write_lock_bh(&tcp_lock);
785         /*
786          * We have to worry for the ack in the reply packet only...
787          */
788         if (after(end, conntrack->proto.tcp.seen[dir].td_end))
789                 conntrack->proto.tcp.seen[dir].td_end = end;
790         conntrack->proto.tcp.last_end = end;
791         write_unlock_bh(&tcp_lock);
792         DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
793                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
794                 sender->td_end, sender->td_maxend, sender->td_maxwin,
795                 sender->td_scale,
796                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
797                 receiver->td_scale);
798 }
799
800 #endif
801
802 #define TH_FIN  0x01
803 #define TH_SYN  0x02
804 #define TH_RST  0x04
805 #define TH_PUSH 0x08
806 #define TH_ACK  0x10
807 #define TH_URG  0x20
808 #define TH_ECE  0x40
809 #define TH_CWR  0x80
810
811 /* table of valid flag combinations - ECE and CWR are always valid */
812 static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
813 {
814         [TH_SYN]                        = 1,
815         [TH_SYN|TH_PUSH]                = 1,
816         [TH_SYN|TH_URG]                 = 1,
817         [TH_SYN|TH_PUSH|TH_URG]         = 1,
818         [TH_SYN|TH_ACK]                 = 1,
819         [TH_SYN|TH_ACK|TH_PUSH]         = 1,
820         [TH_RST]                        = 1,
821         [TH_RST|TH_ACK]                 = 1,
822         [TH_RST|TH_ACK|TH_PUSH]         = 1,
823         [TH_FIN|TH_ACK]                 = 1,
824         [TH_ACK]                        = 1,
825         [TH_ACK|TH_PUSH]                = 1,
826         [TH_ACK|TH_URG]                 = 1,
827         [TH_ACK|TH_URG|TH_PUSH]         = 1,
828         [TH_FIN|TH_ACK|TH_PUSH]         = 1,
829         [TH_FIN|TH_ACK|TH_URG]          = 1,
830         [TH_FIN|TH_ACK|TH_URG|TH_PUSH]  = 1,
831 };
832
833 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
834 static int tcp_error(struct sk_buff *skb,
835                      enum ip_conntrack_info *ctinfo,
836                      unsigned int hooknum)
837 {
838         struct iphdr *iph = skb->nh.iph;
839         struct tcphdr _tcph, *th;
840         unsigned int tcplen = skb->len - iph->ihl * 4;
841         u_int8_t tcpflags;
842
843         /* Smaller that minimal TCP header? */
844         th = skb_header_pointer(skb, iph->ihl * 4,
845                                 sizeof(_tcph), &_tcph);
846         if (th == NULL) {
847                 if (LOG_INVALID(IPPROTO_TCP))
848                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
849                                 "ip_ct_tcp: short packet ");
850                 return -NF_ACCEPT;
851         }
852
853         /* Not whole TCP header or malformed packet */
854         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
855                 if (LOG_INVALID(IPPROTO_TCP))
856                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
857                                 "ip_ct_tcp: truncated/malformed packet ");
858                 return -NF_ACCEPT;
859         }
860
861         /* Checksum invalid? Ignore.
862          * We skip checking packets on the outgoing path
863          * because it is assumed to be correct.
864          */
865         /* FIXME: Source route IP option packets --RR */
866         if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
867             nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
868                 if (LOG_INVALID(IPPROTO_TCP))
869                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
870                                   "ip_ct_tcp: bad TCP checksum ");
871                 return -NF_ACCEPT;
872         }
873
874         /* Check TCP flags. */
875         tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
876         if (!tcp_valid_flags[tcpflags]) {
877                 if (LOG_INVALID(IPPROTO_TCP))
878                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
879                                   "ip_ct_tcp: invalid TCP flag combination ");
880                 return -NF_ACCEPT;
881         }
882
883         return NF_ACCEPT;
884 }
885
886 /* Returns verdict for packet, or -1 for invalid. */
887 static int tcp_packet(struct ip_conntrack *conntrack,
888                       const struct sk_buff *skb,
889                       enum ip_conntrack_info ctinfo)
890 {
891         enum tcp_conntrack new_state, old_state;
892         enum ip_conntrack_dir dir;
893         struct iphdr *iph = skb->nh.iph;
894         struct tcphdr *th, _tcph;
895         unsigned long timeout;
896         unsigned int index;
897
898         th = skb_header_pointer(skb, iph->ihl * 4,
899                                 sizeof(_tcph), &_tcph);
900         BUG_ON(th == NULL);
901
902         write_lock_bh(&tcp_lock);
903         old_state = conntrack->proto.tcp.state;
904         dir = CTINFO2DIR(ctinfo);
905         index = get_conntrack_index(th);
906         new_state = tcp_conntracks[dir][index][old_state];
907
908         switch (new_state) {
909         case TCP_CONNTRACK_IGNORE:
910                 /* Ignored packets:
911                  *
912                  * a) SYN in ORIGINAL
913                  * b) SYN/ACK in REPLY
914                  * c) ACK in reply direction after initial SYN in original.
915                  */
916                 if (index == TCP_SYNACK_SET
917                     && conntrack->proto.tcp.last_index == TCP_SYN_SET
918                     && conntrack->proto.tcp.last_dir != dir
919                     && ntohl(th->ack_seq) ==
920                              conntrack->proto.tcp.last_end) {
921                         /* This SYN/ACK acknowledges a SYN that we earlier
922                          * ignored as invalid. This means that the client and
923                          * the server are both in sync, while the firewall is
924                          * not. We kill this session and block the SYN/ACK so
925                          * that the client cannot but retransmit its SYN and
926                          * thus initiate a clean new session.
927                          */
928                         write_unlock_bh(&tcp_lock);
929                         if (LOG_INVALID(IPPROTO_TCP))
930                                 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
931                                               NULL, "ip_ct_tcp: "
932                                               "killing out of sync session ");
933                         if (del_timer(&conntrack->timeout))
934                                 conntrack->timeout.function((unsigned long)
935                                                             conntrack);
936                         return -NF_DROP;
937                 }
938                 conntrack->proto.tcp.last_index = index;
939                 conntrack->proto.tcp.last_dir = dir;
940                 conntrack->proto.tcp.last_seq = ntohl(th->seq);
941                 conntrack->proto.tcp.last_end =
942                     segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
943
944                 write_unlock_bh(&tcp_lock);
945                 if (LOG_INVALID(IPPROTO_TCP))
946                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
947                                   "ip_ct_tcp: invalid packet ignored ");
948                 return NF_ACCEPT;
949         case TCP_CONNTRACK_MAX:
950                 /* Invalid packet */
951                 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
952                        dir, get_conntrack_index(th),
953                        old_state);
954                 write_unlock_bh(&tcp_lock);
955                 if (LOG_INVALID(IPPROTO_TCP))
956                         nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
957                                   "ip_ct_tcp: invalid state ");
958                 return -NF_ACCEPT;
959         case TCP_CONNTRACK_SYN_SENT:
960                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
961                         break;
962                 if ((conntrack->proto.tcp.seen[dir].flags &
963                          IP_CT_TCP_FLAG_CLOSE_INIT)
964                     || after(ntohl(th->seq),
965                              conntrack->proto.tcp.seen[dir].td_end)) {
966                         /* Attempt to reopen a closed connection.
967                         * Delete this connection and look up again. */
968                         write_unlock_bh(&tcp_lock);
969                         if (del_timer(&conntrack->timeout))
970                                 conntrack->timeout.function((unsigned long)
971                                                             conntrack);
972                         return -NF_REPEAT;
973                 } else {
974                         write_unlock_bh(&tcp_lock);
975                         if (LOG_INVALID(IPPROTO_TCP))
976                                 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
977                                               NULL, "ip_ct_tcp: invalid SYN");
978                         return -NF_ACCEPT;
979                 }
980         case TCP_CONNTRACK_CLOSE:
981                 if (index == TCP_RST_SET
982                     && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
983                          && conntrack->proto.tcp.last_index == TCP_SYN_SET)
984                         || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
985                             && conntrack->proto.tcp.last_index == TCP_ACK_SET))
986                     && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
987                         /* RST sent to invalid SYN or ACK we had let through
988                          * at a) and c) above:
989                          *
990                          * a) SYN was in window then
991                          * c) we hold a half-open connection.
992                          *
993                          * Delete our connection entry.
994                          * We skip window checking, because packet might ACK
995                          * segments we ignored. */
996                         goto in_window;
997                 }
998                 /* Just fall through */
999         default:
1000                 /* Keep compilers happy. */
1001                 break;
1002         }
1003
1004         if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
1005                            skb, iph, th)) {
1006                 write_unlock_bh(&tcp_lock);
1007                 return -NF_ACCEPT;
1008         }
1009     in_window:
1010         /* From now on we have got in-window packets */
1011         conntrack->proto.tcp.last_index = index;
1012
1013         DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
1014                "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1015                 NIPQUAD(iph->saddr), ntohs(th->source),
1016                 NIPQUAD(iph->daddr), ntohs(th->dest),
1017                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1018                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1019                 old_state, new_state);
1020
1021         conntrack->proto.tcp.state = new_state;
1022         if (old_state != new_state
1023             && (new_state == TCP_CONNTRACK_FIN_WAIT
1024                 || new_state == TCP_CONNTRACK_CLOSE))
1025                 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1026         timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
1027                   && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
1028                   ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
1029         write_unlock_bh(&tcp_lock);
1030
1031         ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
1032         if (new_state != old_state)
1033                 ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
1034
1035         if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
1036                 /* If only reply is a RST, we can consider ourselves not to
1037                    have an established connection: this is a fairly common
1038                    problem case, so we can delete the conntrack
1039                    immediately.  --RR */
1040                 if (th->rst) {
1041                         if (del_timer(&conntrack->timeout))
1042                                 conntrack->timeout.function((unsigned long)
1043                                                             conntrack);
1044                         return NF_ACCEPT;
1045                 }
1046         } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
1047                    && (old_state == TCP_CONNTRACK_SYN_RECV
1048                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1049                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1050                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1051                    after SYN_RECV or a valid answer for a picked up
1052                    connection. */
1053                 set_bit(IPS_ASSURED_BIT, &conntrack->status);
1054                 ip_conntrack_event_cache(IPCT_STATUS, skb);
1055         }
1056         ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
1057
1058         return NF_ACCEPT;
1059 }
1060
1061 /* Called when a new connection for this protocol found. */
1062 static int tcp_new(struct ip_conntrack *conntrack,
1063                    const struct sk_buff *skb)
1064 {
1065         enum tcp_conntrack new_state;
1066         struct iphdr *iph = skb->nh.iph;
1067         struct tcphdr *th, _tcph;
1068 #ifdef DEBUGP_VARS
1069         struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1070         struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1071 #endif
1072
1073         th = skb_header_pointer(skb, iph->ihl * 4,
1074                                 sizeof(_tcph), &_tcph);
1075         BUG_ON(th == NULL);
1076
1077         /* Don't need lock here: this conntrack not in circulation yet */
1078         new_state
1079                 = tcp_conntracks[0][get_conntrack_index(th)]
1080                 [TCP_CONNTRACK_NONE];
1081
1082         /* Invalid: delete conntrack */
1083         if (new_state >= TCP_CONNTRACK_MAX) {
1084                 DEBUGP("ip_ct_tcp: invalid new deleting.\n");
1085                 return 0;
1086         }
1087
1088         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1089                 /* SYN packet */
1090                 conntrack->proto.tcp.seen[0].td_end =
1091                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1092                                              iph, th);
1093                 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1094                 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1095                         conntrack->proto.tcp.seen[0].td_maxwin = 1;
1096                 conntrack->proto.tcp.seen[0].td_maxend =
1097                         conntrack->proto.tcp.seen[0].td_end;
1098
1099                 tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
1100                 conntrack->proto.tcp.seen[1].flags = 0;
1101         } else if (ip_ct_tcp_loose == 0) {
1102                 /* Don't try to pick up connections. */
1103                 return 0;
1104         } else {
1105                 /*
1106                  * We are in the middle of a connection,
1107                  * its history is lost for us.
1108                  * Let's try to use the data from the packet.
1109                  */
1110                 conntrack->proto.tcp.seen[0].td_end =
1111                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1112                                              iph, th);
1113                 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1114                 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1115                         conntrack->proto.tcp.seen[0].td_maxwin = 1;
1116                 conntrack->proto.tcp.seen[0].td_maxend =
1117                         conntrack->proto.tcp.seen[0].td_end +
1118                         conntrack->proto.tcp.seen[0].td_maxwin;
1119                 conntrack->proto.tcp.seen[0].td_scale = 0;
1120
1121                 /* We assume SACK and liberal window checking to handle
1122                  * window scaling */
1123                 conntrack->proto.tcp.seen[0].flags =
1124                 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1125                                                      IP_CT_TCP_FLAG_BE_LIBERAL;
1126         }
1127
1128         conntrack->proto.tcp.seen[1].td_end = 0;
1129         conntrack->proto.tcp.seen[1].td_maxend = 0;
1130         conntrack->proto.tcp.seen[1].td_maxwin = 1;
1131         conntrack->proto.tcp.seen[1].td_scale = 0;
1132
1133         /* tcp_packet will set them */
1134         conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1135         conntrack->proto.tcp.last_index = TCP_NONE_SET;
1136
1137         DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1138                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1139                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1140                 sender->td_scale,
1141                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1142                 receiver->td_scale);
1143         return 1;
1144 }
1145
1146 struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
1147 {
1148         .proto                  = IPPROTO_TCP,
1149         .name                   = "tcp",
1150         .pkt_to_tuple           = tcp_pkt_to_tuple,
1151         .invert_tuple           = tcp_invert_tuple,
1152         .print_tuple            = tcp_print_tuple,
1153         .print_conntrack        = tcp_print_conntrack,
1154         .packet                 = tcp_packet,
1155         .new                    = tcp_new,
1156         .error                  = tcp_error,
1157 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1158     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1159         .to_nfattr              = tcp_to_nfattr,
1160         .from_nfattr            = nfattr_to_tcp,
1161         .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
1162         .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
1163 #endif
1164 };