#define TCP_DEBUG 1
#define FASTRETRANS_DEBUG 1
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/tcp.h>
#include <linux/slab.h>
/* Minimal RCV_MSS. */
#define TCP_MIN_RCVMSS 536U
+/* The least MTU to use for probing */
+#define TCP_BASE_MSS 512
+
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
#define TCP_FASTRETRANS_THRESH 3
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_abc;
+extern int sysctl_tcp_mtu_probing;
+extern int sysctl_tcp_base_mss;
+extern int sysctl_tcp_workaround_signed_windows;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
-/*
- * Pointers to address related TCP functions
- * (i.e. things that depend on the address family)
- */
-
-struct tcp_func {
- int (*queue_xmit) (struct sk_buff *skb,
- int ipfragok);
-
- void (*send_check) (struct sock *sk,
- struct tcphdr *th,
- int len,
- struct sk_buff *skb);
-
- int (*rebuild_header) (struct sock *sk);
-
- int (*conn_request) (struct sock *sk,
- struct sk_buff *skb);
-
- struct sock * (*syn_recv_sock) (struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst);
-
- int (*remember_stamp) (struct sock *sk);
-
- __u16 net_header_len;
-
- int (*setsockopt) (struct sock *sk,
- int level,
- int optname,
- char __user *optval,
- int optlen);
-
- int (*getsockopt) (struct sock *sk,
- int level,
- int optname,
- char __user *optval,
- int __user *optlen);
-
-
- void (*addr2sockaddr) (struct sock *sk,
- struct sockaddr *);
-
- int sockaddr_len;
-};
-
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound (automatic with unsigned arithmetic).
extern void tcp_rcv_space_adjust(struct sock *sk);
+extern int tcp_twsk_unique(struct sock *sk,
+ struct sock *sktw, void *twp);
+
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
extern int tcp_setsockopt(struct sock *sk, int level,
int optname, char __user *optval,
int optlen);
+extern int compat_tcp_getsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int compat_tcp_setsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int optlen);
extern void tcp_set_keepalive(struct sock *sk, int val);
extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg,
* TCP v4 functions exported for the inet6 API
*/
-extern void tcp_v4_send_check(struct sock *sk,
- struct tcphdr *th, int len,
+extern void tcp_v4_send_check(struct sock *sk, int len,
struct sk_buff *skb);
extern int tcp_v4_conn_request(struct sock *sk,
extern void tcp_unhash(struct sock *sk);
-extern int tcp_v4_hash_connecting(struct sock *sk);
-
-
/* From syncookies.c */
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
-/* Initialize RCV_MSS value.
- * RCV_MSS is an our guess about MSS used by the peer.
- * We haven't any direct information about the MSS.
- * It's better to underestimate the RCV_MSS rather than overestimate.
- * Overestimations make us ACKing less frequently than needed.
- * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
- */
+extern void tcp_initialize_rcv_mss(struct sock *sk);
-static inline void tcp_initialize_rcv_mss(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
-
- hint = min(hint, tp->rcv_wnd/2);
- hint = min(hint, TCP_MIN_RCVMSS);
- hint = max(hint, TCP_MIN_MSS);
-
- inet_csk(sk)->icsk_ack.rcv_mss = hint;
-}
+extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
+extern int tcp_mss_to_mtu(struct sock *sk, int mss);
+extern void tcp_mtup_init(struct sock *sk);
-static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
ntohl(TCP_FLAG_ACK) |
snd_wnd);
}
-static __inline__ void tcp_fast_path_on(struct tcp_sock *tp)
+static inline void tcp_fast_path_on(struct tcp_sock *tp)
{
__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
}
* Rcv_nxt can be after the window if our peer push more data
* than the offered window.
*/
-static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp)
+static inline u32 tcp_receive_window(const struct tcp_sock *tp)
{
s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
extern int tcp_set_default_congestion_control(const char *name);
extern void tcp_get_default_congestion_control(char *name);
extern int tcp_set_congestion_control(struct sock *sk, const char *name);
+extern void tcp_slow_start(struct tcp_sock *tp);
extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct sock *sk);
* "Packets left network, but not honestly ACKed yet" PLUS
* "Packets fast retransmitted"
*/
-static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{
return (tp->packets_out - tp->left_out + tp->retrans_out);
}
(tp->snd_cwnd >> 2)));
}
-/*
- * Linear increase during slow start
- */
-static inline void tcp_slow_start(struct tcp_sock *tp)
-{
- if (sysctl_tcp_abc) {
- /* RFC3465: Slow Start
- * TCP sender SHOULD increase cwnd by the number of
- * previously unacknowledged bytes ACKed by each incoming
- * acknowledgment, provided the increase is not more than L
- */
- if (tp->bytes_acked < tp->mss_cache)
- return;
-
- /* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
- }
- tp->bytes_acked = 0;
-
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
-}
-
-
static inline void tcp_sync_left_out(struct tcp_sock *tp)
{
if (tp->rx_opt.sack_ok &&
tp->left_out = tp->sacked_out + tp->lost_out;
}
-/* Set slow start threshold and cwnd not falling to slow start */
-static inline void __tcp_enter_cwr(struct sock *sk)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- tp->undo_marker = 0;
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
- tp->snd_cwnd = min(tp->snd_cwnd,
- tcp_packets_in_flight(tp) + 1U);
- tp->snd_cwnd_cnt = 0;
- tp->high_seq = tp->snd_nxt;
- tp->snd_cwnd_stamp = tcp_time_stamp;
- TCP_ECN_queue_cwr(tp);
-}
-
-static inline void tcp_enter_cwr(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- tp->prior_ssthresh = 0;
- tp->bytes_acked = 0;
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
- __tcp_enter_cwr(sk);
- tcp_set_ca_state(sk, TCP_CA_CWR);
- }
-}
-
+extern void tcp_enter_cwr(struct sock *sk);
extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
/* Slow start with delack produces 3 packets of burst, so that
return left <= tcp_max_burst(tp);
}
-static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
- const struct sk_buff *skb)
+static inline void tcp_minshall_update(struct tcp_sock *tp, int mss,
+ const struct sk_buff *skb)
{
if (skb->len < mss)
tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
}
-static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
if (!tp->packets_out && !icsk->icsk_pending)
icsk->icsk_rto, TCP_RTO_MAX);
}
-static __inline__ void tcp_push_pending_frames(struct sock *sk,
- struct tcp_sock *tp)
+static inline void tcp_push_pending_frames(struct sock *sk,
+ struct tcp_sock *tp)
{
__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
}
-static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
+static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
{
tp->snd_wl1 = seq;
}
-static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
+static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
{
tp->snd_wl1 = seq;
}
/*
* Calculate(/check) TCP checksum
*/
-static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
- unsigned long saddr, unsigned long daddr,
- unsigned long base)
+static inline u16 tcp_v4_check(struct tcphdr *th, int len,
+ unsigned long saddr, unsigned long daddr,
+ unsigned long base)
{
return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
}
-static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
+static inline int __tcp_checksum_complete(struct sk_buff *skb)
{
return __skb_checksum_complete(skb);
}
-static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
+static inline int tcp_checksum_complete(struct sk_buff *skb)
{
return skb->ip_summed != CHECKSUM_UNNECESSARY &&
__tcp_checksum_complete(skb);
/* Prequeue for VJ style copy to user, combined with checksumming. */
-static __inline__ void tcp_prequeue_init(struct tcp_sock *tp)
+static inline void tcp_prequeue_init(struct tcp_sock *tp)
{
tp->ucopy.task = NULL;
tp->ucopy.len = 0;
*
* NOTE: is this not too big to inline?
*/
-static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
};
#endif
-static __inline__ void tcp_set_state(struct sock *sk, int state)
+static inline void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
#endif
}
-static __inline__ void tcp_done(struct sock *sk)
+static inline void tcp_done(struct sock *sk)
{
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
inet_csk_destroy_sock(sk);
}
-static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
+static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
{
rx_opt->dsack = 0;
rx_opt->eff_sacks = 0;
rx_opt->num_sacks = 0;
}
-static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
-{
- if (tp->rx_opt.tstamp_ok) {
- *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp);
- *ptr++ = htonl(tp->rx_opt.ts_recent);
- }
- if (tp->rx_opt.eff_sacks) {
- struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
- int this_sack;
-
- *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_SACK << 8) |
- (TCPOLEN_SACK_BASE +
- (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
- for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
- *ptr++ = htonl(sp[this_sack].start_seq);
- *ptr++ = htonl(sp[this_sack].end_seq);
- }
- if (tp->rx_opt.dsack) {
- tp->rx_opt.dsack = 0;
- tp->rx_opt.eff_sacks--;
- }
- }
-}
-
-/* Construct a tcp options header for a SYN or SYN_ACK packet.
- * If this is every changed make sure to change the definition of
- * MAX_SYN_SIZE to match the new maximum number of options that you
- * can generate.
- */
-static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
- int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
-{
- /* We always get an MSS option.
- * The option bytes which will be seen in normal data
- * packets should timestamps be used, must be in the MSS
- * advertised. But we subtract them from tp->mss_cache so
- * that calculations in tcp_sendmsg are simpler etc.
- * So account for this fact here if necessary. If we
- * don't do this correctly, as a receiver we won't
- * recognize data packets as being full sized when we
- * should, and thus we won't abide by the delayed ACK
- * rules correctly.
- * SACKs don't matter, we never delay an ACK when we
- * have any of those going out.
- */
- *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
- if (ts) {
- if(sack)
- *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
- (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- else
- *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp); /* TSVAL */
- *ptr++ = htonl(ts_recent); /* TSECR */
- } else if(sack)
- *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
- if (offer_wscale)
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
-}
-
/* Determine a window scaling and initial window to offer. */
extern void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
return tcp_win_from_space(sk->sk_rcvbuf);
}
-static __inline__ void tcp_openreq_init(struct request_sock *req,
- struct tcp_options_received *rx_opt,
- struct sk_buff *skb)
+static inline void tcp_openreq_init(struct request_sock *req,
+ struct tcp_options_received *rx_opt,
+ struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);