[XFRM]: Add sorting interface for state and template.
[pandora-kernel.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
25
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
28
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
31
32 /* Each xfrm_state may be linked to two tables:
33
34    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35    2. Hash table by daddr to find what SAs exist for given
36       destination/tunnel endpoint. (output)
37  */
38
39 static DEFINE_SPINLOCK(xfrm_state_lock);
40
41 /* Hash table to find appropriate SA towards given target (endpoint
42  * of tunnel or destination of transport mode) allowed by selector.
43  *
44  * Main use is finding SA after policy selected tunnel or transport mode.
45  * Also, it can be used by ah/esp icmp error handler to find offending SA.
46  */
47 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
48 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
49 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
50
51 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
52 EXPORT_SYMBOL(km_waitq);
53
54 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
55 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
56
57 static struct work_struct xfrm_state_gc_work;
58 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
59 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
60
61 static int xfrm_state_gc_flush_bundles;
62
63 int __xfrm_state_delete(struct xfrm_state *x);
64
65 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
66 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
67
68 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
69 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
70
71 static void xfrm_state_gc_destroy(struct xfrm_state *x)
72 {
73         if (del_timer(&x->timer))
74                 BUG();
75         if (del_timer(&x->rtimer))
76                 BUG();
77         kfree(x->aalg);
78         kfree(x->ealg);
79         kfree(x->calg);
80         kfree(x->encap);
81         kfree(x->coaddr);
82         if (x->mode)
83                 xfrm_put_mode(x->mode);
84         if (x->type) {
85                 x->type->destructor(x);
86                 xfrm_put_type(x->type);
87         }
88         security_xfrm_state_free(x);
89         kfree(x);
90 }
91
92 static void xfrm_state_gc_task(void *data)
93 {
94         struct xfrm_state *x;
95         struct list_head *entry, *tmp;
96         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
97
98         if (xfrm_state_gc_flush_bundles) {
99                 xfrm_state_gc_flush_bundles = 0;
100                 xfrm_flush_bundles();
101         }
102
103         spin_lock_bh(&xfrm_state_gc_lock);
104         list_splice_init(&xfrm_state_gc_list, &gc_list);
105         spin_unlock_bh(&xfrm_state_gc_lock);
106
107         list_for_each_safe(entry, tmp, &gc_list) {
108                 x = list_entry(entry, struct xfrm_state, bydst);
109                 xfrm_state_gc_destroy(x);
110         }
111         wake_up(&km_waitq);
112 }
113
114 static inline unsigned long make_jiffies(long secs)
115 {
116         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
117                 return MAX_SCHEDULE_TIMEOUT-1;
118         else
119                 return secs*HZ;
120 }
121
122 static void xfrm_timer_handler(unsigned long data)
123 {
124         struct xfrm_state *x = (struct xfrm_state*)data;
125         unsigned long now = (unsigned long)xtime.tv_sec;
126         long next = LONG_MAX;
127         int warn = 0;
128
129         spin_lock(&x->lock);
130         if (x->km.state == XFRM_STATE_DEAD)
131                 goto out;
132         if (x->km.state == XFRM_STATE_EXPIRED)
133                 goto expired;
134         if (x->lft.hard_add_expires_seconds) {
135                 long tmo = x->lft.hard_add_expires_seconds +
136                         x->curlft.add_time - now;
137                 if (tmo <= 0)
138                         goto expired;
139                 if (tmo < next)
140                         next = tmo;
141         }
142         if (x->lft.hard_use_expires_seconds) {
143                 long tmo = x->lft.hard_use_expires_seconds +
144                         (x->curlft.use_time ? : now) - now;
145                 if (tmo <= 0)
146                         goto expired;
147                 if (tmo < next)
148                         next = tmo;
149         }
150         if (x->km.dying)
151                 goto resched;
152         if (x->lft.soft_add_expires_seconds) {
153                 long tmo = x->lft.soft_add_expires_seconds +
154                         x->curlft.add_time - now;
155                 if (tmo <= 0)
156                         warn = 1;
157                 else if (tmo < next)
158                         next = tmo;
159         }
160         if (x->lft.soft_use_expires_seconds) {
161                 long tmo = x->lft.soft_use_expires_seconds +
162                         (x->curlft.use_time ? : now) - now;
163                 if (tmo <= 0)
164                         warn = 1;
165                 else if (tmo < next)
166                         next = tmo;
167         }
168
169         x->km.dying = warn;
170         if (warn)
171                 km_state_expired(x, 0, 0);
172 resched:
173         if (next != LONG_MAX &&
174             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
175                 xfrm_state_hold(x);
176         goto out;
177
178 expired:
179         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
180                 x->km.state = XFRM_STATE_EXPIRED;
181                 wake_up(&km_waitq);
182                 next = 2;
183                 goto resched;
184         }
185         if (!__xfrm_state_delete(x) && x->id.spi)
186                 km_state_expired(x, 1, 0);
187
188 out:
189         spin_unlock(&x->lock);
190         xfrm_state_put(x);
191 }
192
193 static void xfrm_replay_timer_handler(unsigned long data);
194
195 struct xfrm_state *xfrm_state_alloc(void)
196 {
197         struct xfrm_state *x;
198
199         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
200
201         if (x) {
202                 atomic_set(&x->refcnt, 1);
203                 atomic_set(&x->tunnel_users, 0);
204                 INIT_LIST_HEAD(&x->bydst);
205                 INIT_LIST_HEAD(&x->bysrc);
206                 INIT_LIST_HEAD(&x->byspi);
207                 init_timer(&x->timer);
208                 x->timer.function = xfrm_timer_handler;
209                 x->timer.data     = (unsigned long)x;
210                 init_timer(&x->rtimer);
211                 x->rtimer.function = xfrm_replay_timer_handler;
212                 x->rtimer.data     = (unsigned long)x;
213                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
214                 x->lft.soft_byte_limit = XFRM_INF;
215                 x->lft.soft_packet_limit = XFRM_INF;
216                 x->lft.hard_byte_limit = XFRM_INF;
217                 x->lft.hard_packet_limit = XFRM_INF;
218                 x->replay_maxage = 0;
219                 x->replay_maxdiff = 0;
220                 spin_lock_init(&x->lock);
221         }
222         return x;
223 }
224 EXPORT_SYMBOL(xfrm_state_alloc);
225
226 void __xfrm_state_destroy(struct xfrm_state *x)
227 {
228         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
229
230         spin_lock_bh(&xfrm_state_gc_lock);
231         list_add(&x->bydst, &xfrm_state_gc_list);
232         spin_unlock_bh(&xfrm_state_gc_lock);
233         schedule_work(&xfrm_state_gc_work);
234 }
235 EXPORT_SYMBOL(__xfrm_state_destroy);
236
237 int __xfrm_state_delete(struct xfrm_state *x)
238 {
239         int err = -ESRCH;
240
241         if (x->km.state != XFRM_STATE_DEAD) {
242                 x->km.state = XFRM_STATE_DEAD;
243                 spin_lock(&xfrm_state_lock);
244                 list_del(&x->bydst);
245                 __xfrm_state_put(x);
246                 list_del(&x->bysrc);
247                 __xfrm_state_put(x);
248                 if (x->id.spi) {
249                         list_del(&x->byspi);
250                         __xfrm_state_put(x);
251                 }
252                 spin_unlock(&xfrm_state_lock);
253                 if (del_timer(&x->timer))
254                         __xfrm_state_put(x);
255                 if (del_timer(&x->rtimer))
256                         __xfrm_state_put(x);
257
258                 /* The number two in this test is the reference
259                  * mentioned in the comment below plus the reference
260                  * our caller holds.  A larger value means that
261                  * there are DSTs attached to this xfrm_state.
262                  */
263                 if (atomic_read(&x->refcnt) > 2) {
264                         xfrm_state_gc_flush_bundles = 1;
265                         schedule_work(&xfrm_state_gc_work);
266                 }
267
268                 /* All xfrm_state objects are created by xfrm_state_alloc.
269                  * The xfrm_state_alloc call gives a reference, and that
270                  * is what we are dropping here.
271                  */
272                 __xfrm_state_put(x);
273                 err = 0;
274         }
275
276         return err;
277 }
278 EXPORT_SYMBOL(__xfrm_state_delete);
279
280 int xfrm_state_delete(struct xfrm_state *x)
281 {
282         int err;
283
284         spin_lock_bh(&x->lock);
285         err = __xfrm_state_delete(x);
286         spin_unlock_bh(&x->lock);
287
288         return err;
289 }
290 EXPORT_SYMBOL(xfrm_state_delete);
291
292 void xfrm_state_flush(u8 proto)
293 {
294         int i;
295         struct xfrm_state *x;
296
297         spin_lock_bh(&xfrm_state_lock);
298         for (i = 0; i < XFRM_DST_HSIZE; i++) {
299 restart:
300                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
301                         if (!xfrm_state_kern(x) &&
302                             xfrm_id_proto_match(x->id.proto, proto)) {
303                                 xfrm_state_hold(x);
304                                 spin_unlock_bh(&xfrm_state_lock);
305
306                                 xfrm_state_delete(x);
307                                 xfrm_state_put(x);
308
309                                 spin_lock_bh(&xfrm_state_lock);
310                                 goto restart;
311                         }
312                 }
313         }
314         spin_unlock_bh(&xfrm_state_lock);
315         wake_up(&km_waitq);
316 }
317 EXPORT_SYMBOL(xfrm_state_flush);
318
319 static int
320 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
321                   struct xfrm_tmpl *tmpl,
322                   xfrm_address_t *daddr, xfrm_address_t *saddr,
323                   unsigned short family)
324 {
325         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
326         if (!afinfo)
327                 return -1;
328         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
329         xfrm_state_put_afinfo(afinfo);
330         return 0;
331 }
332
333 struct xfrm_state *
334 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
335                 struct flowi *fl, struct xfrm_tmpl *tmpl,
336                 struct xfrm_policy *pol, int *err,
337                 unsigned short family)
338 {
339         unsigned h = xfrm_dst_hash(daddr, family);
340         struct xfrm_state *x, *x0;
341         int acquire_in_progress = 0;
342         int error = 0;
343         struct xfrm_state *best = NULL;
344         struct xfrm_state_afinfo *afinfo;
345         
346         afinfo = xfrm_state_get_afinfo(family);
347         if (afinfo == NULL) {
348                 *err = -EAFNOSUPPORT;
349                 return NULL;
350         }
351
352         spin_lock_bh(&xfrm_state_lock);
353         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
354                 if (x->props.family == family &&
355                     x->props.reqid == tmpl->reqid &&
356                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
357                     xfrm_state_addr_check(x, daddr, saddr, family) &&
358                     tmpl->mode == x->props.mode &&
359                     tmpl->id.proto == x->id.proto &&
360                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
361                         /* Resolution logic:
362                            1. There is a valid state with matching selector.
363                               Done.
364                            2. Valid state with inappropriate selector. Skip.
365
366                            Entering area of "sysdeps".
367
368                            3. If state is not valid, selector is temporary,
369                               it selects only session which triggered
370                               previous resolution. Key manager will do
371                               something to install a state with proper
372                               selector.
373                          */
374                         if (x->km.state == XFRM_STATE_VALID) {
375                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
376                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
377                                         continue;
378                                 if (!best ||
379                                     best->km.dying > x->km.dying ||
380                                     (best->km.dying == x->km.dying &&
381                                      best->curlft.add_time < x->curlft.add_time))
382                                         best = x;
383                         } else if (x->km.state == XFRM_STATE_ACQ) {
384                                 acquire_in_progress = 1;
385                         } else if (x->km.state == XFRM_STATE_ERROR ||
386                                    x->km.state == XFRM_STATE_EXPIRED) {
387                                 if (xfrm_selector_match(&x->sel, fl, family) &&
388                                     security_xfrm_state_pol_flow_match(x, pol, fl))
389                                         error = -ESRCH;
390                         }
391                 }
392         }
393
394         x = best;
395         if (!x && !error && !acquire_in_progress) {
396                 if (tmpl->id.spi &&
397                     (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
398                                                tmpl->id.proto)) != NULL) {
399                         xfrm_state_put(x0);
400                         error = -EEXIST;
401                         goto out;
402                 }
403                 x = xfrm_state_alloc();
404                 if (x == NULL) {
405                         error = -ENOMEM;
406                         goto out;
407                 }
408                 /* Initialize temporary selector matching only
409                  * to current session. */
410                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
411
412                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
413                 if (error) {
414                         x->km.state = XFRM_STATE_DEAD;
415                         xfrm_state_put(x);
416                         x = NULL;
417                         goto out;
418                 }
419
420                 if (km_query(x, tmpl, pol) == 0) {
421                         x->km.state = XFRM_STATE_ACQ;
422                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
423                         xfrm_state_hold(x);
424                         list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
425                         xfrm_state_hold(x);
426                         if (x->id.spi) {
427                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
428                                 list_add(&x->byspi, xfrm_state_byspi+h);
429                                 xfrm_state_hold(x);
430                         }
431                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
432                         xfrm_state_hold(x);
433                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
434                         add_timer(&x->timer);
435                 } else {
436                         x->km.state = XFRM_STATE_DEAD;
437                         xfrm_state_put(x);
438                         x = NULL;
439                         error = -ESRCH;
440                 }
441         }
442 out:
443         if (x)
444                 xfrm_state_hold(x);
445         else
446                 *err = acquire_in_progress ? -EAGAIN : error;
447         spin_unlock_bh(&xfrm_state_lock);
448         xfrm_state_put_afinfo(afinfo);
449         return x;
450 }
451
452 static void __xfrm_state_insert(struct xfrm_state *x)
453 {
454         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
455
456         list_add(&x->bydst, xfrm_state_bydst+h);
457         xfrm_state_hold(x);
458
459         h = xfrm_src_hash(&x->props.saddr, x->props.family);
460
461         list_add(&x->bysrc, xfrm_state_bysrc+h);
462         xfrm_state_hold(x);
463
464         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
465                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
466                                   x->props.family);
467
468                 list_add(&x->byspi, xfrm_state_byspi+h);
469                 xfrm_state_hold(x);
470         }
471
472         if (!mod_timer(&x->timer, jiffies + HZ))
473                 xfrm_state_hold(x);
474
475         if (x->replay_maxage &&
476             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
477                 xfrm_state_hold(x);
478
479         wake_up(&km_waitq);
480 }
481
482 void xfrm_state_insert(struct xfrm_state *x)
483 {
484         spin_lock_bh(&xfrm_state_lock);
485         __xfrm_state_insert(x);
486         spin_unlock_bh(&xfrm_state_lock);
487
488         xfrm_flush_all_bundles();
489 }
490 EXPORT_SYMBOL(xfrm_state_insert);
491
492 static inline struct xfrm_state *
493 __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
494                     int use_spi)
495 {
496         if (use_spi)
497                 return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
498         else
499                 return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto);
500 }
501
502 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
503
504 int xfrm_state_add(struct xfrm_state *x)
505 {
506         struct xfrm_state_afinfo *afinfo;
507         struct xfrm_state *x1;
508         int family;
509         int err;
510         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
511
512         family = x->props.family;
513         afinfo = xfrm_state_get_afinfo(family);
514         if (unlikely(afinfo == NULL))
515                 return -EAFNOSUPPORT;
516
517         spin_lock_bh(&xfrm_state_lock);
518
519         x1 = __xfrm_state_locate(afinfo, x, use_spi);
520         if (x1) {
521                 xfrm_state_put(x1);
522                 x1 = NULL;
523                 err = -EEXIST;
524                 goto out;
525         }
526
527         if (use_spi && x->km.seq) {
528                 x1 = __xfrm_find_acq_byseq(x->km.seq);
529                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
530                         xfrm_state_put(x1);
531                         x1 = NULL;
532                 }
533         }
534
535         if (use_spi && !x1)
536                 x1 = afinfo->find_acq(
537                         x->props.mode, x->props.reqid, x->id.proto,
538                         &x->id.daddr, &x->props.saddr, 0);
539
540         __xfrm_state_insert(x);
541         err = 0;
542
543 out:
544         spin_unlock_bh(&xfrm_state_lock);
545         xfrm_state_put_afinfo(afinfo);
546
547         if (!err)
548                 xfrm_flush_all_bundles();
549
550         if (x1) {
551                 xfrm_state_delete(x1);
552                 xfrm_state_put(x1);
553         }
554
555         return err;
556 }
557 EXPORT_SYMBOL(xfrm_state_add);
558
559 int xfrm_state_update(struct xfrm_state *x)
560 {
561         struct xfrm_state_afinfo *afinfo;
562         struct xfrm_state *x1;
563         int err;
564         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
565
566         afinfo = xfrm_state_get_afinfo(x->props.family);
567         if (unlikely(afinfo == NULL))
568                 return -EAFNOSUPPORT;
569
570         spin_lock_bh(&xfrm_state_lock);
571         x1 = __xfrm_state_locate(afinfo, x, use_spi);
572
573         err = -ESRCH;
574         if (!x1)
575                 goto out;
576
577         if (xfrm_state_kern(x1)) {
578                 xfrm_state_put(x1);
579                 err = -EEXIST;
580                 goto out;
581         }
582
583         if (x1->km.state == XFRM_STATE_ACQ) {
584                 __xfrm_state_insert(x);
585                 x = NULL;
586         }
587         err = 0;
588
589 out:
590         spin_unlock_bh(&xfrm_state_lock);
591         xfrm_state_put_afinfo(afinfo);
592
593         if (err)
594                 return err;
595
596         if (!x) {
597                 xfrm_state_delete(x1);
598                 xfrm_state_put(x1);
599                 return 0;
600         }
601
602         err = -EINVAL;
603         spin_lock_bh(&x1->lock);
604         if (likely(x1->km.state == XFRM_STATE_VALID)) {
605                 if (x->encap && x1->encap)
606                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
607                 if (x->coaddr && x1->coaddr) {
608                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
609                 }
610                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
611                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
612                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
613                 x1->km.dying = 0;
614
615                 if (!mod_timer(&x1->timer, jiffies + HZ))
616                         xfrm_state_hold(x1);
617                 if (x1->curlft.use_time)
618                         xfrm_state_check_expire(x1);
619
620                 err = 0;
621         }
622         spin_unlock_bh(&x1->lock);
623
624         xfrm_state_put(x1);
625
626         return err;
627 }
628 EXPORT_SYMBOL(xfrm_state_update);
629
630 int xfrm_state_check_expire(struct xfrm_state *x)
631 {
632         if (!x->curlft.use_time)
633                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
634
635         if (x->km.state != XFRM_STATE_VALID)
636                 return -EINVAL;
637
638         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
639             x->curlft.packets >= x->lft.hard_packet_limit) {
640                 x->km.state = XFRM_STATE_EXPIRED;
641                 if (!mod_timer(&x->timer, jiffies))
642                         xfrm_state_hold(x);
643                 return -EINVAL;
644         }
645
646         if (!x->km.dying &&
647             (x->curlft.bytes >= x->lft.soft_byte_limit ||
648              x->curlft.packets >= x->lft.soft_packet_limit)) {
649                 x->km.dying = 1;
650                 km_state_expired(x, 0, 0);
651         }
652         return 0;
653 }
654 EXPORT_SYMBOL(xfrm_state_check_expire);
655
656 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
657 {
658         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
659                 - skb_headroom(skb);
660
661         if (nhead > 0)
662                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
663
664         /* Check tail too... */
665         return 0;
666 }
667
668 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
669 {
670         int err = xfrm_state_check_expire(x);
671         if (err < 0)
672                 goto err;
673         err = xfrm_state_check_space(x, skb);
674 err:
675         return err;
676 }
677 EXPORT_SYMBOL(xfrm_state_check);
678
679 struct xfrm_state *
680 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
681                   unsigned short family)
682 {
683         struct xfrm_state *x;
684         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
685         if (!afinfo)
686                 return NULL;
687
688         spin_lock_bh(&xfrm_state_lock);
689         x = afinfo->state_lookup(daddr, spi, proto);
690         spin_unlock_bh(&xfrm_state_lock);
691         xfrm_state_put_afinfo(afinfo);
692         return x;
693 }
694 EXPORT_SYMBOL(xfrm_state_lookup);
695
696 struct xfrm_state *
697 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
698                          u8 proto, unsigned short family)
699 {
700         struct xfrm_state *x;
701         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
702         if (!afinfo)
703                 return NULL;
704
705         spin_lock_bh(&xfrm_state_lock);
706         x = afinfo->state_lookup_byaddr(daddr, saddr, proto);
707         spin_unlock_bh(&xfrm_state_lock);
708         xfrm_state_put_afinfo(afinfo);
709         return x;
710 }
711 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
712
713 struct xfrm_state *
714 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
715               xfrm_address_t *daddr, xfrm_address_t *saddr, 
716               int create, unsigned short family)
717 {
718         struct xfrm_state *x;
719         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
720         if (!afinfo)
721                 return NULL;
722
723         spin_lock_bh(&xfrm_state_lock);
724         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
725         spin_unlock_bh(&xfrm_state_lock);
726         xfrm_state_put_afinfo(afinfo);
727         return x;
728 }
729 EXPORT_SYMBOL(xfrm_find_acq);
730
731 #ifdef CONFIG_XFRM_SUB_POLICY
732 int
733 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
734                unsigned short family)
735 {
736         int err = 0;
737         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
738         if (!afinfo)
739                 return -EAFNOSUPPORT;
740
741         spin_lock_bh(&xfrm_state_lock);
742         if (afinfo->tmpl_sort)
743                 err = afinfo->tmpl_sort(dst, src, n);
744         spin_unlock_bh(&xfrm_state_lock);
745         xfrm_state_put_afinfo(afinfo);
746         return err;
747 }
748 EXPORT_SYMBOL(xfrm_tmpl_sort);
749
750 int
751 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
752                 unsigned short family)
753 {
754         int err = 0;
755         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
756         if (!afinfo)
757                 return -EAFNOSUPPORT;
758
759         spin_lock_bh(&xfrm_state_lock);
760         if (afinfo->state_sort)
761                 err = afinfo->state_sort(dst, src, n);
762         spin_unlock_bh(&xfrm_state_lock);
763         xfrm_state_put_afinfo(afinfo);
764         return err;
765 }
766 EXPORT_SYMBOL(xfrm_state_sort);
767 #endif
768
769 /* Silly enough, but I'm lazy to build resolution list */
770
771 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
772 {
773         int i;
774         struct xfrm_state *x;
775
776         for (i = 0; i < XFRM_DST_HSIZE; i++) {
777                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
778                         if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
779                                 xfrm_state_hold(x);
780                                 return x;
781                         }
782                 }
783         }
784         return NULL;
785 }
786
787 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
788 {
789         struct xfrm_state *x;
790
791         spin_lock_bh(&xfrm_state_lock);
792         x = __xfrm_find_acq_byseq(seq);
793         spin_unlock_bh(&xfrm_state_lock);
794         return x;
795 }
796 EXPORT_SYMBOL(xfrm_find_acq_byseq);
797
798 u32 xfrm_get_acqseq(void)
799 {
800         u32 res;
801         static u32 acqseq;
802         static DEFINE_SPINLOCK(acqseq_lock);
803
804         spin_lock_bh(&acqseq_lock);
805         res = (++acqseq ? : ++acqseq);
806         spin_unlock_bh(&acqseq_lock);
807         return res;
808 }
809 EXPORT_SYMBOL(xfrm_get_acqseq);
810
811 void
812 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
813 {
814         u32 h;
815         struct xfrm_state *x0;
816
817         if (x->id.spi)
818                 return;
819
820         if (minspi == maxspi) {
821                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
822                 if (x0) {
823                         xfrm_state_put(x0);
824                         return;
825                 }
826                 x->id.spi = minspi;
827         } else {
828                 u32 spi = 0;
829                 minspi = ntohl(minspi);
830                 maxspi = ntohl(maxspi);
831                 for (h=0; h<maxspi-minspi+1; h++) {
832                         spi = minspi + net_random()%(maxspi-minspi+1);
833                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
834                         if (x0 == NULL) {
835                                 x->id.spi = htonl(spi);
836                                 break;
837                         }
838                         xfrm_state_put(x0);
839                 }
840         }
841         if (x->id.spi) {
842                 spin_lock_bh(&xfrm_state_lock);
843                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
844                 list_add(&x->byspi, xfrm_state_byspi+h);
845                 xfrm_state_hold(x);
846                 spin_unlock_bh(&xfrm_state_lock);
847                 wake_up(&km_waitq);
848         }
849 }
850 EXPORT_SYMBOL(xfrm_alloc_spi);
851
852 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
853                     void *data)
854 {
855         int i;
856         struct xfrm_state *x;
857         int count = 0;
858         int err = 0;
859
860         spin_lock_bh(&xfrm_state_lock);
861         for (i = 0; i < XFRM_DST_HSIZE; i++) {
862                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
863                         if (xfrm_id_proto_match(x->id.proto, proto))
864                                 count++;
865                 }
866         }
867         if (count == 0) {
868                 err = -ENOENT;
869                 goto out;
870         }
871
872         for (i = 0; i < XFRM_DST_HSIZE; i++) {
873                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
874                         if (!xfrm_id_proto_match(x->id.proto, proto))
875                                 continue;
876                         err = func(x, --count, data);
877                         if (err)
878                                 goto out;
879                 }
880         }
881 out:
882         spin_unlock_bh(&xfrm_state_lock);
883         return err;
884 }
885 EXPORT_SYMBOL(xfrm_state_walk);
886
887
888 void xfrm_replay_notify(struct xfrm_state *x, int event)
889 {
890         struct km_event c;
891         /* we send notify messages in case
892          *  1. we updated on of the sequence numbers, and the seqno difference
893          *     is at least x->replay_maxdiff, in this case we also update the
894          *     timeout of our timer function
895          *  2. if x->replay_maxage has elapsed since last update,
896          *     and there were changes
897          *
898          *  The state structure must be locked!
899          */
900
901         switch (event) {
902         case XFRM_REPLAY_UPDATE:
903                 if (x->replay_maxdiff &&
904                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
905                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
906                         if (x->xflags & XFRM_TIME_DEFER)
907                                 event = XFRM_REPLAY_TIMEOUT;
908                         else
909                                 return;
910                 }
911
912                 break;
913
914         case XFRM_REPLAY_TIMEOUT:
915                 if ((x->replay.seq == x->preplay.seq) &&
916                     (x->replay.bitmap == x->preplay.bitmap) &&
917                     (x->replay.oseq == x->preplay.oseq)) {
918                         x->xflags |= XFRM_TIME_DEFER;
919                         return;
920                 }
921
922                 break;
923         }
924
925         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
926         c.event = XFRM_MSG_NEWAE;
927         c.data.aevent = event;
928         km_state_notify(x, &c);
929
930         if (x->replay_maxage &&
931             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
932                 xfrm_state_hold(x);
933                 x->xflags &= ~XFRM_TIME_DEFER;
934         }
935 }
936 EXPORT_SYMBOL(xfrm_replay_notify);
937
938 static void xfrm_replay_timer_handler(unsigned long data)
939 {
940         struct xfrm_state *x = (struct xfrm_state*)data;
941
942         spin_lock(&x->lock);
943
944         if (x->km.state == XFRM_STATE_VALID) {
945                 if (xfrm_aevent_is_on())
946                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
947                 else
948                         x->xflags |= XFRM_TIME_DEFER;
949         }
950
951         spin_unlock(&x->lock);
952         xfrm_state_put(x);
953 }
954
955 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
956 {
957         u32 diff;
958
959         seq = ntohl(seq);
960
961         if (unlikely(seq == 0))
962                 return -EINVAL;
963
964         if (likely(seq > x->replay.seq))
965                 return 0;
966
967         diff = x->replay.seq - seq;
968         if (diff >= x->props.replay_window) {
969                 x->stats.replay_window++;
970                 return -EINVAL;
971         }
972
973         if (x->replay.bitmap & (1U << diff)) {
974                 x->stats.replay++;
975                 return -EINVAL;
976         }
977         return 0;
978 }
979 EXPORT_SYMBOL(xfrm_replay_check);
980
981 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
982 {
983         u32 diff;
984
985         seq = ntohl(seq);
986
987         if (seq > x->replay.seq) {
988                 diff = seq - x->replay.seq;
989                 if (diff < x->props.replay_window)
990                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
991                 else
992                         x->replay.bitmap = 1;
993                 x->replay.seq = seq;
994         } else {
995                 diff = x->replay.seq - seq;
996                 x->replay.bitmap |= (1U << diff);
997         }
998
999         if (xfrm_aevent_is_on())
1000                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1001 }
1002 EXPORT_SYMBOL(xfrm_replay_advance);
1003
1004 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1005 static DEFINE_RWLOCK(xfrm_km_lock);
1006
1007 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1008 {
1009         struct xfrm_mgr *km;
1010
1011         read_lock(&xfrm_km_lock);
1012         list_for_each_entry(km, &xfrm_km_list, list)
1013                 if (km->notify_policy)
1014                         km->notify_policy(xp, dir, c);
1015         read_unlock(&xfrm_km_lock);
1016 }
1017
1018 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1019 {
1020         struct xfrm_mgr *km;
1021         read_lock(&xfrm_km_lock);
1022         list_for_each_entry(km, &xfrm_km_list, list)
1023                 if (km->notify)
1024                         km->notify(x, c);
1025         read_unlock(&xfrm_km_lock);
1026 }
1027
1028 EXPORT_SYMBOL(km_policy_notify);
1029 EXPORT_SYMBOL(km_state_notify);
1030
1031 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1032 {
1033         struct km_event c;
1034
1035         c.data.hard = hard;
1036         c.pid = pid;
1037         c.event = XFRM_MSG_EXPIRE;
1038         km_state_notify(x, &c);
1039
1040         if (hard)
1041                 wake_up(&km_waitq);
1042 }
1043
1044 EXPORT_SYMBOL(km_state_expired);
1045 /*
1046  * We send to all registered managers regardless of failure
1047  * We are happy with one success
1048 */
1049 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1050 {
1051         int err = -EINVAL, acqret;
1052         struct xfrm_mgr *km;
1053
1054         read_lock(&xfrm_km_lock);
1055         list_for_each_entry(km, &xfrm_km_list, list) {
1056                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1057                 if (!acqret)
1058                         err = acqret;
1059         }
1060         read_unlock(&xfrm_km_lock);
1061         return err;
1062 }
1063 EXPORT_SYMBOL(km_query);
1064
1065 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1066 {
1067         int err = -EINVAL;
1068         struct xfrm_mgr *km;
1069
1070         read_lock(&xfrm_km_lock);
1071         list_for_each_entry(km, &xfrm_km_list, list) {
1072                 if (km->new_mapping)
1073                         err = km->new_mapping(x, ipaddr, sport);
1074                 if (!err)
1075                         break;
1076         }
1077         read_unlock(&xfrm_km_lock);
1078         return err;
1079 }
1080 EXPORT_SYMBOL(km_new_mapping);
1081
1082 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1083 {
1084         struct km_event c;
1085
1086         c.data.hard = hard;
1087         c.pid = pid;
1088         c.event = XFRM_MSG_POLEXPIRE;
1089         km_policy_notify(pol, dir, &c);
1090
1091         if (hard)
1092                 wake_up(&km_waitq);
1093 }
1094 EXPORT_SYMBOL(km_policy_expired);
1095
1096 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1097 {
1098         int err = -EINVAL;
1099         int ret;
1100         struct xfrm_mgr *km;
1101
1102         read_lock(&xfrm_km_lock);
1103         list_for_each_entry(km, &xfrm_km_list, list) {
1104                 if (km->report) {
1105                         ret = km->report(proto, sel, addr);
1106                         if (!ret)
1107                                 err = ret;
1108                 }
1109         }
1110         read_unlock(&xfrm_km_lock);
1111         return err;
1112 }
1113 EXPORT_SYMBOL(km_report);
1114
1115 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1116 {
1117         int err;
1118         u8 *data;
1119         struct xfrm_mgr *km;
1120         struct xfrm_policy *pol = NULL;
1121
1122         if (optlen <= 0 || optlen > PAGE_SIZE)
1123                 return -EMSGSIZE;
1124
1125         data = kmalloc(optlen, GFP_KERNEL);
1126         if (!data)
1127                 return -ENOMEM;
1128
1129         err = -EFAULT;
1130         if (copy_from_user(data, optval, optlen))
1131                 goto out;
1132
1133         err = -EINVAL;
1134         read_lock(&xfrm_km_lock);
1135         list_for_each_entry(km, &xfrm_km_list, list) {
1136                 pol = km->compile_policy(sk, optname, data,
1137                                          optlen, &err);
1138                 if (err >= 0)
1139                         break;
1140         }
1141         read_unlock(&xfrm_km_lock);
1142
1143         if (err >= 0) {
1144                 xfrm_sk_policy_insert(sk, err, pol);
1145                 xfrm_pol_put(pol);
1146                 err = 0;
1147         }
1148
1149 out:
1150         kfree(data);
1151         return err;
1152 }
1153 EXPORT_SYMBOL(xfrm_user_policy);
1154
1155 int xfrm_register_km(struct xfrm_mgr *km)
1156 {
1157         write_lock_bh(&xfrm_km_lock);
1158         list_add_tail(&km->list, &xfrm_km_list);
1159         write_unlock_bh(&xfrm_km_lock);
1160         return 0;
1161 }
1162 EXPORT_SYMBOL(xfrm_register_km);
1163
1164 int xfrm_unregister_km(struct xfrm_mgr *km)
1165 {
1166         write_lock_bh(&xfrm_km_lock);
1167         list_del(&km->list);
1168         write_unlock_bh(&xfrm_km_lock);
1169         return 0;
1170 }
1171 EXPORT_SYMBOL(xfrm_unregister_km);
1172
1173 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1174 {
1175         int err = 0;
1176         if (unlikely(afinfo == NULL))
1177                 return -EINVAL;
1178         if (unlikely(afinfo->family >= NPROTO))
1179                 return -EAFNOSUPPORT;
1180         write_lock_bh(&xfrm_state_afinfo_lock);
1181         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1182                 err = -ENOBUFS;
1183         else {
1184                 afinfo->state_bydst = xfrm_state_bydst;
1185                 afinfo->state_bysrc = xfrm_state_bysrc;
1186                 afinfo->state_byspi = xfrm_state_byspi;
1187                 xfrm_state_afinfo[afinfo->family] = afinfo;
1188         }
1189         write_unlock_bh(&xfrm_state_afinfo_lock);
1190         return err;
1191 }
1192 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1193
1194 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1195 {
1196         int err = 0;
1197         if (unlikely(afinfo == NULL))
1198                 return -EINVAL;
1199         if (unlikely(afinfo->family >= NPROTO))
1200                 return -EAFNOSUPPORT;
1201         write_lock_bh(&xfrm_state_afinfo_lock);
1202         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1203                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1204                         err = -EINVAL;
1205                 else {
1206                         xfrm_state_afinfo[afinfo->family] = NULL;
1207                         afinfo->state_byspi = NULL;
1208                         afinfo->state_bysrc = NULL;
1209                         afinfo->state_bydst = NULL;
1210                 }
1211         }
1212         write_unlock_bh(&xfrm_state_afinfo_lock);
1213         return err;
1214 }
1215 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1216
1217 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1218 {
1219         struct xfrm_state_afinfo *afinfo;
1220         if (unlikely(family >= NPROTO))
1221                 return NULL;
1222         read_lock(&xfrm_state_afinfo_lock);
1223         afinfo = xfrm_state_afinfo[family];
1224         if (unlikely(!afinfo))
1225                 read_unlock(&xfrm_state_afinfo_lock);
1226         return afinfo;
1227 }
1228
1229 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1230 {
1231         read_unlock(&xfrm_state_afinfo_lock);
1232 }
1233
1234 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1235 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1236 {
1237         if (x->tunnel) {
1238                 struct xfrm_state *t = x->tunnel;
1239
1240                 if (atomic_read(&t->tunnel_users) == 2)
1241                         xfrm_state_delete(t);
1242                 atomic_dec(&t->tunnel_users);
1243                 xfrm_state_put(t);
1244                 x->tunnel = NULL;
1245         }
1246 }
1247 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1248
1249 /*
1250  * This function is NOT optimal.  For example, with ESP it will give an
1251  * MTU that's usually two bytes short of being optimal.  However, it will
1252  * usually give an answer that's a multiple of 4 provided the input is
1253  * also a multiple of 4.
1254  */
1255 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1256 {
1257         int res = mtu;
1258
1259         res -= x->props.header_len;
1260
1261         for (;;) {
1262                 int m = res;
1263
1264                 if (m < 68)
1265                         return 68;
1266
1267                 spin_lock_bh(&x->lock);
1268                 if (x->km.state == XFRM_STATE_VALID &&
1269                     x->type && x->type->get_max_size)
1270                         m = x->type->get_max_size(x, m);
1271                 else
1272                         m += x->props.header_len;
1273                 spin_unlock_bh(&x->lock);
1274
1275                 if (m <= mtu)
1276                         break;
1277                 res -= (m - mtu);
1278         }
1279
1280         return res;
1281 }
1282
1283 int xfrm_init_state(struct xfrm_state *x)
1284 {
1285         struct xfrm_state_afinfo *afinfo;
1286         int family = x->props.family;
1287         int err;
1288
1289         err = -EAFNOSUPPORT;
1290         afinfo = xfrm_state_get_afinfo(family);
1291         if (!afinfo)
1292                 goto error;
1293
1294         err = 0;
1295         if (afinfo->init_flags)
1296                 err = afinfo->init_flags(x);
1297
1298         xfrm_state_put_afinfo(afinfo);
1299
1300         if (err)
1301                 goto error;
1302
1303         err = -EPROTONOSUPPORT;
1304         x->type = xfrm_get_type(x->id.proto, family);
1305         if (x->type == NULL)
1306                 goto error;
1307
1308         err = x->type->init_state(x);
1309         if (err)
1310                 goto error;
1311
1312         x->mode = xfrm_get_mode(x->props.mode, family);
1313         if (x->mode == NULL)
1314                 goto error;
1315
1316         x->km.state = XFRM_STATE_VALID;
1317
1318 error:
1319         return err;
1320 }
1321
1322 EXPORT_SYMBOL(xfrm_init_state);
1323  
1324 void __init xfrm_state_init(void)
1325 {
1326         int i;
1327
1328         for (i=0; i<XFRM_DST_HSIZE; i++) {
1329                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1330                 INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
1331                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1332         }
1333         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1334 }
1335