Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
[pandora-kernel.git] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <linux/module.h>
20 #include <linux/rtnetlink.h>
21 #include <linux/init.h>
22 #include <net/act_api.h>
23 #include <net/netlink.h>
24
25 #define L2T(p,L)   ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
26 #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
27
28 #define POL_TAB_MASK     15
29 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
30 static u32 police_idx_gen;
31 static DEFINE_RWLOCK(police_lock);
32
33 static struct tcf_hashinfo police_hash_info = {
34         .htab   =       tcf_police_ht,
35         .hmask  =       POL_TAB_MASK,
36         .lock   =       &police_lock,
37 };
38
39 /* old policer structure from before tc actions */
40 struct tc_police_compat
41 {
42         u32                     index;
43         int                     action;
44         u32                     limit;
45         u32                     burst;
46         u32                     mtu;
47         struct tc_ratespec      rate;
48         struct tc_ratespec      peakrate;
49 };
50
51 /* Each policer is serialized by its individual spinlock */
52
53 #ifdef CONFIG_NET_CLS_ACT
54 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
55                               int type, struct tc_action *a)
56 {
57         struct tcf_common *p;
58         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
59         struct rtattr *r;
60
61         read_lock(&police_lock);
62
63         s_i = cb->args[0];
64
65         for (i = 0; i < (POL_TAB_MASK + 1); i++) {
66                 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
67
68                 for (; p; p = p->tcfc_next) {
69                         index++;
70                         if (index < s_i)
71                                 continue;
72                         a->priv = p;
73                         a->order = index;
74                         r = (struct rtattr *)skb_tail_pointer(skb);
75                         RTA_PUT(skb, a->order, 0, NULL);
76                         if (type == RTM_DELACTION)
77                                 err = tcf_action_dump_1(skb, a, 0, 1);
78                         else
79                                 err = tcf_action_dump_1(skb, a, 0, 0);
80                         if (err < 0) {
81                                 index--;
82                                 nlmsg_trim(skb, r);
83                                 goto done;
84                         }
85                         r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
86                         n_i++;
87                 }
88         }
89 done:
90         read_unlock(&police_lock);
91         if (n_i)
92                 cb->args[0] += n_i;
93         return n_i;
94
95 rtattr_failure:
96         nlmsg_trim(skb, r);
97         goto done;
98 }
99 #endif
100
101 void tcf_police_destroy(struct tcf_police *p)
102 {
103         unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
104         struct tcf_common **p1p;
105
106         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
107                 if (*p1p == &p->common) {
108                         write_lock_bh(&police_lock);
109                         *p1p = p->tcf_next;
110                         write_unlock_bh(&police_lock);
111                         gen_kill_estimator(&p->tcf_bstats,
112                                            &p->tcf_rate_est);
113                         if (p->tcfp_R_tab)
114                                 qdisc_put_rtab(p->tcfp_R_tab);
115                         if (p->tcfp_P_tab)
116                                 qdisc_put_rtab(p->tcfp_P_tab);
117                         kfree(p);
118                         return;
119                 }
120         }
121         BUG_TRAP(0);
122 }
123
124 #ifdef CONFIG_NET_CLS_ACT
125 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
126                                  struct tc_action *a, int ovr, int bind)
127 {
128         unsigned h;
129         int ret = 0, err;
130         struct rtattr *tb[TCA_POLICE_MAX];
131         struct tc_police *parm;
132         struct tcf_police *police;
133         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
134         int size;
135
136         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
137                 return -EINVAL;
138
139         if (tb[TCA_POLICE_TBF-1] == NULL)
140                 return -EINVAL;
141         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
142         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
143                 return -EINVAL;
144         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
145
146         if (tb[TCA_POLICE_RESULT-1] != NULL &&
147             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
148                 return -EINVAL;
149         if (tb[TCA_POLICE_RESULT-1] != NULL &&
150             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
151                 return -EINVAL;
152
153         if (parm->index) {
154                 struct tcf_common *pc;
155
156                 pc = tcf_hash_lookup(parm->index, &police_hash_info);
157                 if (pc != NULL) {
158                         a->priv = pc;
159                         police = to_police(pc);
160                         if (bind) {
161                                 police->tcf_bindcnt += 1;
162                                 police->tcf_refcnt += 1;
163                         }
164                         if (ovr)
165                                 goto override;
166                         return ret;
167                 }
168         }
169
170         police = kzalloc(sizeof(*police), GFP_KERNEL);
171         if (police == NULL)
172                 return -ENOMEM;
173         ret = ACT_P_CREATED;
174         police->tcf_refcnt = 1;
175         spin_lock_init(&police->tcf_lock);
176         if (bind)
177                 police->tcf_bindcnt = 1;
178 override:
179         if (parm->rate.rate) {
180                 err = -ENOMEM;
181                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
182                 if (R_tab == NULL)
183                         goto failure;
184                 if (parm->peakrate.rate) {
185                         P_tab = qdisc_get_rtab(&parm->peakrate,
186                                                tb[TCA_POLICE_PEAKRATE-1]);
187                         if (P_tab == NULL) {
188                                 qdisc_put_rtab(R_tab);
189                                 goto failure;
190                         }
191                 }
192         }
193         /* No failure allowed after this point */
194         spin_lock_bh(&police->tcf_lock);
195         if (R_tab != NULL) {
196                 qdisc_put_rtab(police->tcfp_R_tab);
197                 police->tcfp_R_tab = R_tab;
198         }
199         if (P_tab != NULL) {
200                 qdisc_put_rtab(police->tcfp_P_tab);
201                 police->tcfp_P_tab = P_tab;
202         }
203
204         if (tb[TCA_POLICE_RESULT-1])
205                 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
206         police->tcfp_toks = police->tcfp_burst = parm->burst;
207         police->tcfp_mtu = parm->mtu;
208         if (police->tcfp_mtu == 0) {
209                 police->tcfp_mtu = ~0;
210                 if (police->tcfp_R_tab)
211                         police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
212         }
213         if (police->tcfp_P_tab)
214                 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
215         police->tcf_action = parm->action;
216
217         if (tb[TCA_POLICE_AVRATE-1])
218                 police->tcfp_ewma_rate =
219                         *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
220         if (est)
221                 gen_replace_estimator(&police->tcf_bstats,
222                                       &police->tcf_rate_est,
223                                       &police->tcf_lock, est);
224
225         spin_unlock_bh(&police->tcf_lock);
226         if (ret != ACT_P_CREATED)
227                 return ret;
228
229         police->tcfp_t_c = psched_get_time();
230         police->tcf_index = parm->index ? parm->index :
231                 tcf_hash_new_index(&police_idx_gen, &police_hash_info);
232         h = tcf_hash(police->tcf_index, POL_TAB_MASK);
233         write_lock_bh(&police_lock);
234         police->tcf_next = tcf_police_ht[h];
235         tcf_police_ht[h] = &police->common;
236         write_unlock_bh(&police_lock);
237
238         a->priv = police;
239         return ret;
240
241 failure:
242         if (ret == ACT_P_CREATED)
243                 kfree(police);
244         return err;
245 }
246
247 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
248 {
249         struct tcf_police *p = a->priv;
250
251         if (p != NULL)
252                 return tcf_police_release(p, bind);
253         return 0;
254 }
255
256 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
257                           struct tcf_result *res)
258 {
259         struct tcf_police *police = a->priv;
260         psched_time_t now;
261         long toks;
262         long ptoks = 0;
263
264         spin_lock(&police->tcf_lock);
265
266         police->tcf_bstats.bytes += skb->len;
267         police->tcf_bstats.packets++;
268
269         if (police->tcfp_ewma_rate &&
270             police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
271                 police->tcf_qstats.overlimits++;
272                 spin_unlock(&police->tcf_lock);
273                 return police->tcf_action;
274         }
275
276         if (skb->len <= police->tcfp_mtu) {
277                 if (police->tcfp_R_tab == NULL) {
278                         spin_unlock(&police->tcf_lock);
279                         return police->tcfp_result;
280                 }
281
282                 now = psched_get_time();
283                 toks = psched_tdiff_bounded(now, police->tcfp_t_c,
284                                             police->tcfp_burst);
285                 if (police->tcfp_P_tab) {
286                         ptoks = toks + police->tcfp_ptoks;
287                         if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
288                                 ptoks = (long)L2T_P(police, police->tcfp_mtu);
289                         ptoks -= L2T_P(police, skb->len);
290                 }
291                 toks += police->tcfp_toks;
292                 if (toks > (long)police->tcfp_burst)
293                         toks = police->tcfp_burst;
294                 toks -= L2T(police, skb->len);
295                 if ((toks|ptoks) >= 0) {
296                         police->tcfp_t_c = now;
297                         police->tcfp_toks = toks;
298                         police->tcfp_ptoks = ptoks;
299                         spin_unlock(&police->tcf_lock);
300                         return police->tcfp_result;
301                 }
302         }
303
304         police->tcf_qstats.overlimits++;
305         spin_unlock(&police->tcf_lock);
306         return police->tcf_action;
307 }
308
309 static int
310 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
311 {
312         unsigned char *b = skb_tail_pointer(skb);
313         struct tcf_police *police = a->priv;
314         struct tc_police opt;
315
316         opt.index = police->tcf_index;
317         opt.action = police->tcf_action;
318         opt.mtu = police->tcfp_mtu;
319         opt.burst = police->tcfp_burst;
320         opt.refcnt = police->tcf_refcnt - ref;
321         opt.bindcnt = police->tcf_bindcnt - bind;
322         if (police->tcfp_R_tab)
323                 opt.rate = police->tcfp_R_tab->rate;
324         else
325                 memset(&opt.rate, 0, sizeof(opt.rate));
326         if (police->tcfp_P_tab)
327                 opt.peakrate = police->tcfp_P_tab->rate;
328         else
329                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
330         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
331         if (police->tcfp_result)
332                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
333                         &police->tcfp_result);
334         if (police->tcfp_ewma_rate)
335                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
336         return skb->len;
337
338 rtattr_failure:
339         nlmsg_trim(skb, b);
340         return -1;
341 }
342
343 MODULE_AUTHOR("Alexey Kuznetsov");
344 MODULE_DESCRIPTION("Policing actions");
345 MODULE_LICENSE("GPL");
346
347 static struct tc_action_ops act_police_ops = {
348         .kind           =       "police",
349         .hinfo          =       &police_hash_info,
350         .type           =       TCA_ID_POLICE,
351         .capab          =       TCA_CAP_NONE,
352         .owner          =       THIS_MODULE,
353         .act            =       tcf_act_police,
354         .dump           =       tcf_act_police_dump,
355         .cleanup        =       tcf_act_police_cleanup,
356         .lookup         =       tcf_hash_search,
357         .init           =       tcf_act_police_locate,
358         .walk           =       tcf_act_police_walker
359 };
360
361 static int __init
362 police_init_module(void)
363 {
364         return tcf_register_action(&act_police_ops);
365 }
366
367 static void __exit
368 police_cleanup_module(void)
369 {
370         tcf_unregister_action(&act_police_ops);
371 }
372
373 module_init(police_init_module);
374 module_exit(police_cleanup_module);
375
376 #else /* CONFIG_NET_CLS_ACT */
377
378 static struct tcf_common *tcf_police_lookup(u32 index)
379 {
380         struct tcf_hashinfo *hinfo = &police_hash_info;
381         struct tcf_common *p;
382
383         read_lock(hinfo->lock);
384         for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
385              p = p->tcfc_next) {
386                 if (p->tcfc_index == index)
387                         break;
388         }
389         read_unlock(hinfo->lock);
390
391         return p;
392 }
393
394 static u32 tcf_police_new_index(void)
395 {
396         u32 *idx_gen = &police_idx_gen;
397         u32 val = *idx_gen;
398
399         do {
400                 if (++val == 0)
401                         val = 1;
402         } while (tcf_police_lookup(val));
403
404         return (*idx_gen = val);
405 }
406
407 struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
408 {
409         unsigned int h;
410         struct tcf_police *police;
411         struct rtattr *tb[TCA_POLICE_MAX];
412         struct tc_police *parm;
413         int size;
414
415         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
416                 return NULL;
417
418         if (tb[TCA_POLICE_TBF-1] == NULL)
419                 return NULL;
420         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
421         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
422                 return NULL;
423
424         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
425
426         if (parm->index) {
427                 struct tcf_common *pc;
428
429                 pc = tcf_police_lookup(parm->index);
430                 if (pc) {
431                         police = to_police(pc);
432                         police->tcf_refcnt++;
433                         return police;
434                 }
435         }
436         police = kzalloc(sizeof(*police), GFP_KERNEL);
437         if (unlikely(!police))
438                 return NULL;
439
440         police->tcf_refcnt = 1;
441         spin_lock_init(&police->tcf_lock);
442         if (parm->rate.rate) {
443                 police->tcfp_R_tab =
444                         qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
445                 if (police->tcfp_R_tab == NULL)
446                         goto failure;
447                 if (parm->peakrate.rate) {
448                         police->tcfp_P_tab =
449                                 qdisc_get_rtab(&parm->peakrate,
450                                                tb[TCA_POLICE_PEAKRATE-1]);
451                         if (police->tcfp_P_tab == NULL)
452                                 goto failure;
453                 }
454         }
455         if (tb[TCA_POLICE_RESULT-1]) {
456                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
457                         goto failure;
458                 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
459         }
460         if (tb[TCA_POLICE_AVRATE-1]) {
461                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
462                         goto failure;
463                 police->tcfp_ewma_rate =
464                         *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
465         }
466         police->tcfp_toks = police->tcfp_burst = parm->burst;
467         police->tcfp_mtu = parm->mtu;
468         if (police->tcfp_mtu == 0) {
469                 police->tcfp_mtu = ~0;
470                 if (police->tcfp_R_tab)
471                         police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
472         }
473         if (police->tcfp_P_tab)
474                 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
475         police->tcfp_t_c = psched_get_time();
476         police->tcf_index = parm->index ? parm->index :
477                 tcf_police_new_index();
478         police->tcf_action = parm->action;
479         if (est)
480                 gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
481                                   &police->tcf_lock, est);
482         h = tcf_hash(police->tcf_index, POL_TAB_MASK);
483         write_lock_bh(&police_lock);
484         police->tcf_next = tcf_police_ht[h];
485         tcf_police_ht[h] = &police->common;
486         write_unlock_bh(&police_lock);
487         return police;
488
489 failure:
490         if (police->tcfp_R_tab)
491                 qdisc_put_rtab(police->tcfp_R_tab);
492         kfree(police);
493         return NULL;
494 }
495
496 int tcf_police(struct sk_buff *skb, struct tcf_police *police)
497 {
498         psched_time_t now;
499         long toks;
500         long ptoks = 0;
501
502         spin_lock(&police->tcf_lock);
503
504         police->tcf_bstats.bytes += skb->len;
505         police->tcf_bstats.packets++;
506
507         if (police->tcfp_ewma_rate &&
508             police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
509                 police->tcf_qstats.overlimits++;
510                 spin_unlock(&police->tcf_lock);
511                 return police->tcf_action;
512         }
513         if (skb->len <= police->tcfp_mtu) {
514                 if (police->tcfp_R_tab == NULL) {
515                         spin_unlock(&police->tcf_lock);
516                         return police->tcfp_result;
517                 }
518
519                 now = psched_get_time();
520                 toks = psched_tdiff_bounded(now, police->tcfp_t_c,
521                                             police->tcfp_burst);
522                 if (police->tcfp_P_tab) {
523                         ptoks = toks + police->tcfp_ptoks;
524                         if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
525                                 ptoks = (long)L2T_P(police, police->tcfp_mtu);
526                         ptoks -= L2T_P(police, skb->len);
527                 }
528                 toks += police->tcfp_toks;
529                 if (toks > (long)police->tcfp_burst)
530                         toks = police->tcfp_burst;
531                 toks -= L2T(police, skb->len);
532                 if ((toks|ptoks) >= 0) {
533                         police->tcfp_t_c = now;
534                         police->tcfp_toks = toks;
535                         police->tcfp_ptoks = ptoks;
536                         spin_unlock(&police->tcf_lock);
537                         return police->tcfp_result;
538                 }
539         }
540
541         police->tcf_qstats.overlimits++;
542         spin_unlock(&police->tcf_lock);
543         return police->tcf_action;
544 }
545 EXPORT_SYMBOL(tcf_police);
546
547 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
548 {
549         unsigned char *b = skb_tail_pointer(skb);
550         struct tc_police opt;
551
552         opt.index = police->tcf_index;
553         opt.action = police->tcf_action;
554         opt.mtu = police->tcfp_mtu;
555         opt.burst = police->tcfp_burst;
556         if (police->tcfp_R_tab)
557                 opt.rate = police->tcfp_R_tab->rate;
558         else
559                 memset(&opt.rate, 0, sizeof(opt.rate));
560         if (police->tcfp_P_tab)
561                 opt.peakrate = police->tcfp_P_tab->rate;
562         else
563                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
564         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
565         if (police->tcfp_result)
566                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
567                         &police->tcfp_result);
568         if (police->tcfp_ewma_rate)
569                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
570         return skb->len;
571
572 rtattr_failure:
573         nlmsg_trim(skb, b);
574         return -1;
575 }
576
577 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
578 {
579         struct gnet_dump d;
580
581         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
582                                          TCA_XSTATS, &police->tcf_lock,
583                                          &d) < 0)
584                 goto errout;
585
586         if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
587             gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
588             gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
589                 goto errout;
590
591         if (gnet_stats_finish_copy(&d) < 0)
592                 goto errout;
593
594         return 0;
595
596 errout:
597         return -1;
598 }
599
600 #endif /* CONFIG_NET_CLS_ACT */