/*
* This implementation should follow RFC 4341
- *
- * BUGS:
- * - sequence number wrapping
*/
#include "../ccid.h"
pipe++;
/* packets are sent sequentially */
- BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
+ BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
+ prev->ccid2s_seq ) >= 0);
BUG_ON(time_before(seqp->ccid2s_sent,
prev->ccid2s_sent));
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
- struct ccid2_hc_tx_sock *hctx;
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
- switch (DCCP_SKB_CB(skb)->dccpd_type) {
- case 0: /* XXX data packets from userland come through like this */
- case DCCP_PKT_DATA:
- case DCCP_PKT_DATAACK:
- break;
- /* No congestion control on other packets */
- default:
+ if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
return 0;
- }
-
- hctx = ccid2_hc_tx_sk(sk);
-
- ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
- hctx->ccid2hctx_cwnd);
-
- if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
- /* OK we can send... make sure previous packet was sent off */
- if (!hctx->ccid2hctx_sendwait) {
- hctx->ccid2hctx_sendwait = 1;
- return 0;
- }
- }
return 1; /* XXX CCID should dequeue when ready instead of polling */
}
-static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
+static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
{
struct dccp_sock *dp = dccp_sk(sk);
+ u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
+
/*
- * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio
- * should be 1... it shouldn't be allowed to become 2.
- * -sorbo.
+ * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
+ * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
+ * acceptable since this causes starvation/deadlock whenever cwnd < 2.
+ * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
*/
- if (val != 2) {
- const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
- int max = hctx->ccid2hctx_cwnd / 2;
-
- /* round up */
- if (hctx->ccid2hctx_cwnd & 1)
- max++;
-
- if (val > max)
- val = max;
+ if (val == 0 || val > max_ratio) {
+ DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
+ val = max_ratio;
}
+ if (val > 0xFFFF) /* RFC 4340, 11.3 */
+ val = 0xFFFF;
- ccid2_pr_debug("changing local ack ratio to %d\n", val);
- WARN_ON(val <= 0);
- dp->dccps_l_ack_ratio = val;
-}
-
-static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
-{
- if (val == 0)
- val = 1;
-
- /* XXX do we need to change ack ratio? */
- ccid2_pr_debug("change cwnd to %d\n", val);
+ if (val == dp->dccps_l_ack_ratio)
+ return;
- BUG_ON(val < 1);
- hctx->ccid2hctx_cwnd = val;
+ ccid2_pr_debug("changing local ack ratio to %u\n", val);
+ dp->dccps_l_ack_ratio = val;
}
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
hctx->ccid2hctx_srtt = val;
}
-static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
-{
- hctx->ccid2hctx_pipe = val;
-}
-
static void ccid2_start_rto_timer(struct sock *sk);
static void ccid2_hc_tx_rto_expire(unsigned long data)
ccid2_start_rto_timer(sk);
/* adjust pipe, cwnd etc */
- ccid2_change_pipe(hctx, 0);
- hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
+ hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
if (hctx->ccid2hctx_ssthresh < 2)
hctx->ccid2hctx_ssthresh = 2;
- ccid2_change_cwnd(hctx, 1);
+ hctx->ccid2hctx_cwnd = 1;
+ hctx->ccid2hctx_pipe = 0;
/* clear state about stuff we sent */
- hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
- hctx->ccid2hctx_ssacks = 0;
- hctx->ccid2hctx_acks = 0;
- hctx->ccid2hctx_sent = 0;
+ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
+ hctx->ccid2hctx_packets_acked = 0;
/* clear ack ratio state. */
- hctx->ccid2hctx_arsent = 0;
- hctx->ccid2hctx_ackloss = 0;
hctx->ccid2hctx_rpseq = 0;
hctx->ccid2hctx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
struct ccid2_seq *next;
- u64 seq;
-
- ccid2_hc_tx_check_sanity(hctx);
-
- BUG_ON(!hctx->ccid2hctx_sendwait);
- hctx->ccid2hctx_sendwait = 0;
- ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
- BUG_ON(hctx->ccid2hctx_pipe < 0);
- /* There is an issue. What if another packet is sent between
- * packet_send() and packet_sent(). Then the sequence number would be
- * wrong.
- * -sorbo.
- */
- seq = dp->dccps_gss;
+ hctx->ccid2hctx_pipe++;
- hctx->ccid2hctx_seqh->ccid2s_seq = seq;
+ hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
hctx->ccid2hctx_pipe);
- hctx->ccid2hctx_sent++;
-
+ /*
+ * FIXME: The code below is broken and the variables have been removed
+ * from the socket struct. The `ackloss' variable was always set to 0,
+ * and with arsent there are several problems:
+ * (i) it doesn't just count the number of Acks, but all sent packets;
+ * (ii) it is expressed in # of packets, not # of windows, so the
+ * comparison below uses the wrong formula: Appendix A of RFC 4341
+ * comes up with the number K = cwnd / (R^2 - R) of consecutive windows
+ * of data with no lost or marked Ack packets. If arsent were the # of
+ * consecutive Acks received without loss, then Ack Ratio needs to be
+ * decreased by 1 when
+ * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
+ * where cwnd / R is the number of Acks received per window of data
+ * (cf. RFC 4341, App. A). The problems are that
+ * - arsent counts other packets as well;
+ * - the comparison uses a formula different from RFC 4341;
+ * - computing a cubic/quadratic equation each time is too complicated.
+ * Hence a different algorithm is needed.
+ */
+#if 0
/* Ack Ratio. Need to maintain a concept of how many windows we sent */
hctx->ccid2hctx_arsent++;
/* We had an ack loss in this window... */
hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
}
}
+#endif
/* setup RTO timer */
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
ccid2_start_rto_timer(sk);
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
- ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
- ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
do {
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
- /* slow start */
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
- hctx->ccid2hctx_acks = 0;
-
- /* We can increase cwnd at most maxincr [ack_ratio/2] */
- if (*maxincr) {
- /* increase every 2 acks */
- hctx->ccid2hctx_ssacks++;
- if (hctx->ccid2hctx_ssacks == 2) {
- ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
- hctx->ccid2hctx_ssacks = 0;
- *maxincr = *maxincr - 1;
- }
- } else {
- /* increased cwnd enough for this single ack */
- hctx->ccid2hctx_ssacks = 0;
- }
- } else {
- hctx->ccid2hctx_ssacks = 0;
- hctx->ccid2hctx_acks++;
-
- if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
- ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
- hctx->ccid2hctx_acks = 0;
+ if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
+ hctx->ccid2hctx_cwnd += 1;
+ *maxincr -= 1;
+ hctx->ccid2hctx_packets_acked = 0;
}
+ } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
+ hctx->ccid2hctx_cwnd += 1;
+ hctx->ccid2hctx_packets_acked = 0;
}
/* update RTO */
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
hctx->ccid2hctx_rto, HZ, r);
- hctx->ccid2hctx_sent = 0;
}
/* we got a new ack, so re-start RTO timer */
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
- ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
- BUG_ON(hctx->ccid2hctx_pipe < 0);
+ if (hctx->ccid2hctx_pipe == 0)
+ DCCP_BUG("pipe == 0");
+ else
+ hctx->ccid2hctx_pipe--;
if (hctx->ccid2hctx_pipe == 0)
ccid2_hc_tx_kill_rto_timer(sk);
}
-static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
- struct ccid2_seq *seqp)
+static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
return;
hctx->ccid2hctx_last_cong = jiffies;
- ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
- hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
- if (hctx->ccid2hctx_ssthresh < 2)
- hctx->ccid2hctx_ssthresh = 2;
+ hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
+ hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
+
+ /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
+ if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
+ ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
}
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hctx->ccid2hctx_rpseq = seqno;
} else {
/* check if packet is consecutive */
- if ((hctx->ccid2hctx_rpseq + 1) == seqno)
- hctx->ccid2hctx_rpseq++;
+ if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
+ hctx->ccid2hctx_rpseq = seqno;
/* it's a later packet */
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
hctx->ccid2hctx_rpdupack++;
/* check if we got enough dupacks */
- if (hctx->ccid2hctx_rpdupack >=
- hctx->ccid2hctx_numdupack) {
+ if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
hctx->ccid2hctx_rpseq = 0;
- ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1);
+ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
}
}
}
}
}
- /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
- * this single ack. I round up.
- * -sorbo.
+ /*
+ * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
+ * packets per acknowledgement. Rounding up avoids that cwnd is not
+ * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
*/
- maxincr = dp->dccps_l_ack_ratio >> 1;
- maxincr++;
+ if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
+ maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
/* go through all ack vectors */
while ((offset = ccid2_ackvector(sk, skb, offset,
/* go through this ack vector */
while (veclen--) {
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
- u64 ackno_end_rl;
+ u64 ackno_end_rl = SUB48(ackno, rl);
- dccp_set_seqno(&ackno_end_rl, ackno - rl);
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
(unsigned long long)ackno,
(unsigned long long)ackno_end_rl);
!seqp->ccid2s_acked) {
if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
- ccid2_congestion_event(hctx,
+ ccid2_congestion_event(sk,
seqp);
} else
ccid2_new_ack(sk, seqp,
done = 1;
break;
}
- seqp = seqp->ccid2s_next;
+ seqp = seqp->ccid2s_prev;
}
if (done)
break;
-
- dccp_set_seqno(&ackno, ackno_end_rl - 1);
+ ackno = SUB48(ackno_end_rl, 1);
vector++;
}
if (done)
while (1) {
if (seqp->ccid2s_acked) {
done++;
- if (done == hctx->ccid2hctx_numdupack)
+ if (done == NUMDUPACK)
break;
}
if (seqp == hctx->ccid2hctx_seqt)
/* If there are at least 3 acknowledgements, anything unacknowledged
* below the last sequence number is considered lost
*/
- if (done == hctx->ccid2hctx_numdupack) {
+ if (done == NUMDUPACK) {
struct ccid2_seq *last_acked = seqp;
/* check for lost packets */
* order to detect multiple congestion events in
* one ack vector.
*/
- ccid2_congestion_event(hctx, seqp);
+ ccid2_congestion_event(sk, seqp);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt)
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
+ struct dccp_sock *dp = dccp_sk(sk);
+ u32 max_ratio;
+
+ /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
+ hctx->ccid2hctx_ssthresh = ~0U;
- ccid2_change_cwnd(hctx, 1);
- /* Initialize ssthresh to infinity. This means that we will exit the
- * initial slow-start after the first packet loss. This is what we
- * want.
+ /*
+ * RFC 4341, 5: "The cwnd parameter is initialized to at most four
+ * packets for new connections, following the rules from [RFC3390]".
+ * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
*/
- hctx->ccid2hctx_ssthresh = ~0;
- hctx->ccid2hctx_numdupack = 3;
- hctx->ccid2hctx_seqbufc = 0;
+ hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+
+ /* Make sure that Ack Ratio is enabled and within bounds. */
+ max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
+ if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
+ dp->dccps_l_ack_ratio = max_ratio;
/* XXX init ~ to window size... */
if (ccid2_hc_tx_alloc_seq(hctx))
return -ENOMEM;
- hctx->ccid2hctx_sent = 0;
hctx->ccid2hctx_rto = 3 * HZ;
ccid2_change_srtt(hctx, -1);
hctx->ccid2hctx_rttvar = -1;
- hctx->ccid2hctx_lastrtt = 0;
hctx->ccid2hctx_rpdupack = -1;
hctx->ccid2hctx_last_cong = jiffies;
- hctx->ccid2hctx_high_ack = 0;
-
- hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
- hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
- init_timer(&hctx->ccid2hctx_rtotimer);
+ setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
+ (unsigned long)sk);
ccid2_hc_tx_check_sanity(hctx);
return 0;
static struct ccid_operations ccid2 = {
.ccid_id = DCCPC_CCID2,
- .ccid_name = "ccid2",
+ .ccid_name = "TCP-like",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,