X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_output.c;h=cb1130cbf0cfd31ec256b5bfb94259569ebf9440;hb=a0eee8681a9bc21d3f83260a8ceb3fa30236b189;hp=63170e29754003c78b2cce548425ae1710c317c4;hpb=653f42f6b6348652c02737924abd6a5a6426e7ee;p=pandora-kernel.git diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 63170e297540..cb1130cbf0cf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -220,7 +220,8 @@ void tcp_select_initial_window(int __space, __u32 mss, /* Set window scaling on max possible window * See RFC1323 for an explanation of the limit to 14 */ - space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; @@ -835,11 +836,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) { + if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - skb->ooo_okay = 1; - } else - skb->ooo_okay = 0; + + /* if no packet is in qdisc/device queue, then allow XPS to select + * another queue. + */ + skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -931,6 +934,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1012,9 +1018,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1093,6 +1097,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; + eat = min_t(int, len, skb_headlen(skb)); + if (eat) { + __skb_pull(skb, eat); + len -= eat; + if (!len) + return; + } eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -1124,11 +1135,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - /* If len == headlen, we avoid __skb_pull to preserve alignment. */ - if (unlikely(len < skb_headlen(skb))) - __skb_pull(skb, len); - else - __pskb_trim_head(skb, len - skb_headlen(skb)); + __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; @@ -1138,11 +1145,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - /* Any change of skb->len requires recalculation of tso - * factor and mss. - */ + /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); return 0; } @@ -1167,8 +1172,8 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + if (mss_now < TCP_MIN_SND_MSS) + mss_now = TCP_MIN_SND_MSS; /* Now subtract TCP options size, not including SACKs */ mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); @@ -1585,8 +1590,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return 1; @@ -1668,16 +1676,19 @@ static int tcp_mtu_probe(struct sock *sk) nskb->ip_summed = skb->ip_summed; tcp_insert_write_queue_before(nskb, skb, sk); + tcp_highest_sack_replace(sk, skb, nskb); len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -1913,9 +1924,11 @@ u32 __tcp_select_window(struct sock *sk) int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; - if (mss > full_space) + if (unlikely(mss > full_space)) { mss = full_space; - + if (mss <= 0) + return 0; + } if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -1975,7 +1988,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - tcp_highest_sack_combine(sk, next_skb, skb); + tcp_highest_sack_replace(sk, next_skb, skb); tcp_unlink_write_queue(next_skb, sk); @@ -2059,7 +2072,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, /* Punt if not enough space exists in the first SKB for * the data in the second */ - if (skb->len > skb_tailroom(to)) + if (skb->len > skb_availroom(to)) break; if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) @@ -2089,7 +2102,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { @@ -2120,6 +2134,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } @@ -2311,33 +2327,40 @@ begin_fwd: } } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || tcp_memory_pressure)) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } /* Reserve space for headers and prepare control bits. */ @@ -2347,7 +2370,7 @@ void tcp_send_fin(struct sock *sk) TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to @@ -2623,13 +2646,10 @@ int tcp_connect(struct sock *sk) tcp_connect_init(sk); - buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); - if (unlikely(buff == NULL)) + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + if (unlikely(!buff)) return -ENOBUFS; - /* Reserve space for headers. */ - skb_reserve(buff, MAX_TCP_HEADER); - tp->snd_nxt = tp->write_seq; tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); TCP_ECN_send_syn(sk, buff);