/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+ space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0) {
+ if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- skb->ooo_okay = 1;
- } else
- skb->ooo_okay = 0;
+
+ /* if no packet is in qdisc/device queue, then allow XPS to select
+ * another queue.
+ */
+ skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
+ /* Make sure we own this skb before messing gso_size/gso_segs */
+ WARN_ON_ONCE(skb_cloned(skb));
+
if (skb->len <= mss_now || !sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
if (nsize < 0)
nsize = 0;
- if (skb_cloned(skb) &&
- skb_is_nonlinear(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
/* Get a new skb... force flag on. */
eat = min_t(int, len, skb_headlen(skb));
if (eat) {
__skb_pull(skb, eat);
- skb->avail_size -= eat;
len -= eat;
if (!len)
return;
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
- unsigned int mss_now, unsigned int max_segs)
+ unsigned int mss_now, unsigned int cwnd)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, max_len;
+ u32 needed, window, cwnd_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- max_len = mss_now * max_segs;
+ cwnd_len = mss_now * cwnd;
- if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
- return max_len;
+ if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+ return cwnd_len;
needed = min(skb->len, window);
- if (max_len <= needed)
- return max_len;
+ if (cwnd_len <= needed)
+ return cwnd_len;
return needed - needed % mss_now;
}
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
- sk->sk_gso_max_segs * tp->mss_cache))
+ if (limit >= sk->sk_gso_max_size)
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
goto send_now;
}
- /* Ok, it looks like it is advisable to defer. */
- tp->tso_deferred = 1 | (jiffies << 1);
+ /* Ok, it looks like it is advisable to defer.
+ * Do not rearm the timer if already set to not break TCP ACK clocking.
+ */
+ if (!tp->tso_deferred)
+ tp->tso_deferred = 1 | (jiffies << 1);
return 1;
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed)
+ if (nskb->ip_summed) {
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- else
- nskb->csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, nskb->csum);
+ } else {
+ __wsum csum = skb_copy_and_csum_bits(skb, 0,
+ skb_put(nskb, copy),
+ copy, 0);
+ nskb->csum = csum_block_add(nskb->csum, csum, len);
+ }
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- min_t(unsigned int,
- cwnd_quota,
- sk->sk_gso_max_segs));
+ cwnd_quota);
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
int window;
- if (mss > full_space)
+ if (unlikely(mss > full_space)) {
mss = full_space;
-
+ if (mss <= 0)
+ return 0;
+ }
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- tcp_highest_sack_combine(sk, next_skb, skb);
+ tcp_highest_sack_replace(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
- min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+ min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
+ sk->sk_sndbuf))
return -EAGAIN;
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
int oldpcount = tcp_skb_pcount(skb);
if (unlikely(oldpcount > 1)) {
+ if (skb_unclone(skb, GFP_ATOMIC))
+ return -ENOMEM;
tcp_init_tso_segs(sk, skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
}
}
-/* Send a fin. The caller locks the socket for us. This cannot be
- * allowed to fail queueing a FIN frame under any circumstances.
+/* Send a FIN. The caller locks the socket for us.
+ * We should try to send a FIN packet really hard, but eventually give up.
*/
void tcp_send_fin(struct sock *sk)
{
+ struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = tcp_write_queue_tail(sk);
- int mss_now;
- /* Optimization, tack on the FIN if we have a queue of
- * unsent frames. But be careful about outgoing SACKS
- * and IP options.
+ /* Optimization, tack on the FIN if we have one skb in write queue and
+ * this skb was not yet sent, or we are under memory pressure.
+ * Note: in the latter case, FIN packet will be sent after a timeout,
+ * as TCP stack thinks it has already been transmitted.
*/
- mss_now = tcp_current_mss(sk);
-
- if (tcp_send_head(sk) != NULL) {
- TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
- TCP_SKB_CB(skb)->end_seq++;
+ if (tskb && (tcp_send_head(sk) || tcp_memory_pressure)) {
+coalesce:
+ TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
+ TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
+ if (!tcp_send_head(sk)) {
+ /* This means tskb was already sent.
+ * Pretend we included the FIN on previous transmit.
+ * We need to set tp->snd_nxt to the value it would have
+ * if FIN had been sent. This is because retransmit path
+ * does not change tp->snd_nxt.
+ */
+ tp->snd_nxt++;
+ return;
+ }
} else {
- /* Socket is locked, keep trying until memory is available. */
- for (;;) {
- skb = alloc_skb_fclone(MAX_TCP_HEADER,
- sk->sk_allocation);
- if (skb)
- break;
- yield();
+ skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ if (unlikely(!skb)) {
+ if (tskb)
+ goto coalesce;
+ return;
}
/* Reserve space for headers and prepare control bits. */
TCPHDR_ACK | TCPHDR_FIN);
tcp_queue_skb(sk, skb);
}
- __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
}
/* We get here when a process closes a file descriptor (either due to
tcp_connect_init(sk);
- buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
- if (unlikely(buff == NULL))
+ buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ if (unlikely(!buff))
return -ENOBUFS;
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_TCP_HEADER);
-
tp->snd_nxt = tp->write_seq;
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
TCP_ECN_send_syn(sk, buff);