net/ipv4/tcp_cong.c

   1 /*
   2  * Plugable TCP congestion control support and newReno
   3  * congestion control.
   4  * Based on ideas from I/O scheduler suport and Web100.
   5  *
   6  * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
   7  */
   8
   9 #include <linux/config.h>
  10 #include <linux/module.h>
  11 #include <linux/mm.h>
  12 #include <linux/types.h>
  13 #include <linux/list.h>
  14 #include <net/tcp.h>
  15
  16 static DEFINE_SPINLOCK(tcp_cong_list_lock);
  17 static LIST_HEAD(tcp_cong_list);
  18
  19 /* Simple linear search, don't expect many entries! */
  20 static struct tcp_congestion_ops *tcp_ca_find(const char *name)
  21 {
  22         struct tcp_congestion_ops *e;
  23
  24         list_for_each_entry_rcu(e, &tcp_cong_list, list) {
  25                 if (strcmp(e->name, name) == 0)
  26                         return e;
  27         }
  28
  29         return NULL;
  30 }
  31
  32 /*
  33  * Attach new congestion control algorthim to the list
  34  * of available options.
  35  */
  36 int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
  37 {
  38         int ret = 0;
  39
  40         /* all algorithms must implement ssthresh and cong_avoid ops */
  41         if (!ca->ssthresh || !ca->cong_avoid || !ca->min_cwnd) {
  42                 printk(KERN_ERR "TCP %s does not implement required ops\n",
  43                        ca->name);
  44                 return -EINVAL;
  45         }
  46
  47         spin_lock(&tcp_cong_list_lock);
  48         if (tcp_ca_find(ca->name)) {
  49                 printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
  50                 ret = -EEXIST;
  51         } else {
  52                 list_add_rcu(&ca->list, &tcp_cong_list);
  53                 printk(KERN_INFO "TCP %s registered\n", ca->name);
  54         }
  55         spin_unlock(&tcp_cong_list_lock);
  56
  57         return ret;
  58 }
  59 EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
  60
  61 /*
  62  * Remove congestion control algorithm, called from
  63  * the module's remove function.  Module ref counts are used
  64  * to ensure that this can't be done till all sockets using
  65  * that method are closed.
  66  */
  67 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
  68 {
  69         spin_lock(&tcp_cong_list_lock);
  70         list_del_rcu(&ca->list);
  71         spin_unlock(&tcp_cong_list_lock);
  72 }
  73 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
  74
  75 /* Assign choice of congestion control. */
  76 void tcp_init_congestion_control(struct tcp_sock *tp)
  77 {
  78         struct tcp_congestion_ops *ca;
  79
  80         if (tp->ca_ops != &tcp_init_congestion_ops)
  81                 return;
  82
  83         rcu_read_lock();
  84         list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
  85                 if (try_module_get(ca->owner)) {
  86                         tp->ca_ops = ca;
  87                         break;
  88                 }
  89
  90         }
  91         rcu_read_unlock();
  92
  93         if (tp->ca_ops->init)
  94                 tp->ca_ops->init(tp);
  95 }
  96
  97 /* Manage refcounts on socket close. */
  98 void tcp_cleanup_congestion_control(struct tcp_sock *tp)
  99 {
 100         if (tp->ca_ops->release)
 101                 tp->ca_ops->release(tp);
 102         module_put(tp->ca_ops->owner);
 103 }
 104
 105 /* Used by sysctl to change default congestion control */
 106 int tcp_set_default_congestion_control(const char *name)
 107 {
 108         struct tcp_congestion_ops *ca;
 109         int ret = -ENOENT;
 110
 111         spin_lock(&tcp_cong_list_lock);
 112         ca = tcp_ca_find(name);
 113 #ifdef CONFIG_KMOD
 114         if (!ca) {
 115                 spin_unlock(&tcp_cong_list_lock);
 116
 117                 request_module("tcp_%s", name);
 118                 spin_lock(&tcp_cong_list_lock);
 119                 ca = tcp_ca_find(name);
 120         }
 121 #endif
 122
 123         if (ca) {
 124                 list_move(&ca->list, &tcp_cong_list);
 125                 ret = 0;
 126         }
 127         spin_unlock(&tcp_cong_list_lock);
 128
 129         return ret;
 130 }
 131
 132 /* Get current default congestion control */
 133 void tcp_get_default_congestion_control(char *name)
 134 {
 135         struct tcp_congestion_ops *ca;
 136         /* We will always have reno... */
 137         BUG_ON(list_empty(&tcp_cong_list));
 138
 139         rcu_read_lock();
 140         ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
 141         strncpy(name, ca->name, TCP_CA_NAME_MAX);
 142         rcu_read_unlock();
 143 }
 144
 145 /* Change congestion control for socket */
 146 int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
 147 {
 148         struct tcp_congestion_ops *ca;
 149         int err = 0;
 150
 151         rcu_read_lock();
 152         ca = tcp_ca_find(name);
 153         if (ca == tp->ca_ops)
 154                 goto out;
 155
 156         if (!ca)
 157                 err = -ENOENT;
 158
 159         else if (!try_module_get(ca->owner))
 160                 err = -EBUSY;
 161
 162         else {
 163                 tcp_cleanup_congestion_control(tp);
 164                 tp->ca_ops = ca;
 165                 if (tp->ca_ops->init)
 166                         tp->ca_ops->init(tp);
 167         }
 168  out:
 169         rcu_read_unlock();
 170         return err;
 171 }
 172
 173 /*
 174  * TCP Reno congestion control
 175  * This is special case used for fallback as well.
 176  */
 177 /* This is Jacobson's slow start and congestion avoidance.
 178  * SIGCOMM '88, p. 328.
 179  */
 180 void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
 181                          int flag)
 182 {
 183         if (in_flight < tp->snd_cwnd)
 184                 return;
 185
 186         if (tp->snd_cwnd <= tp->snd_ssthresh) {
 187                 /* In "safe" area, increase. */
 188                 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 189                         tp->snd_cwnd++;
 190         } else {
 191                 /* In dangerous area, increase slowly.
 192                  * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
 193                  */
 194                 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
 195                         if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 196                                 tp->snd_cwnd++;
 197                         tp->snd_cwnd_cnt = 0;
 198                 } else
 199                         tp->snd_cwnd_cnt++;
 200         }
 201 }
 202 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 203
 204 /* Slow start threshold is half the congestion window (min 2) */
 205 u32 tcp_reno_ssthresh(struct tcp_sock *tp)
 206 {
 207         return max(tp->snd_cwnd >> 1U, 2U);
 208 }
 209 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 210
 211 /* Lower bound on congestion window. */
 212 u32 tcp_reno_min_cwnd(struct tcp_sock *tp)
 213 {
 214         return tp->snd_ssthresh/2;
 215 }
 216 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
 217
 218 struct tcp_congestion_ops tcp_reno = {
 219         .name           = "reno",
 220         .owner          = THIS_MODULE,
 221         .ssthresh       = tcp_reno_ssthresh,
 222         .cong_avoid     = tcp_reno_cong_avoid,
 223         .min_cwnd       = tcp_reno_min_cwnd,
 224 };
 225
 226 /* Initial congestion control used (until SYN)
 227  * really reno under another name so we can tell difference
 228  * during tcp_set_default_congestion_control
 229  */
 230 struct tcp_congestion_ops tcp_init_congestion_ops  = {
 231         .name           = "",
 232         .owner          = THIS_MODULE,
 233         .ssthresh       = tcp_reno_ssthresh,
 234         .cong_avoid     = tcp_reno_cong_avoid,
 235         .min_cwnd       = tcp_reno_min_cwnd,
 236 };
 237 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);