8eaee499cad5501524de4310c5bce7fe52e86431
[pandora-kernel.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
24 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
25 /* Each xfrm_state may be linked to two tables:
26
27    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
28    2. Hash table by daddr to find what SAs exist for given
29       destination/tunnel endpoint. (output)
30  */
31
32 static DEFINE_SPINLOCK(xfrm_state_lock);
33
34 /* Hash table to find appropriate SA towards given target (endpoint
35  * of tunnel or destination of transport mode) allowed by selector.
36  *
37  * Main use is finding SA after policy selected tunnel or transport mode.
38  * Also, it can be used by ah/esp icmp error handler to find offending SA.
39  */
40 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
41 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
42
43 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
44 EXPORT_SYMBOL(km_waitq);
45
46 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
47 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
48
49 static struct work_struct xfrm_state_gc_work;
50 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
51 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
52
53 static int xfrm_state_gc_flush_bundles;
54
55 static int __xfrm_state_delete(struct xfrm_state *x);
56
57 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
58 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
59
60 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
61 static void km_state_expired(struct xfrm_state *x, int hard);
62
63 static void xfrm_state_gc_destroy(struct xfrm_state *x)
64 {
65         if (del_timer(&x->timer))
66                 BUG();
67         if (del_timer(&x->rtimer))
68                 BUG();
69         kfree(x->aalg);
70         kfree(x->ealg);
71         kfree(x->calg);
72         kfree(x->encap);
73         if (x->type) {
74                 x->type->destructor(x);
75                 xfrm_put_type(x->type);
76         }
77         security_xfrm_state_free(x);
78         kfree(x);
79 }
80
81 static void xfrm_state_gc_task(void *data)
82 {
83         struct xfrm_state *x;
84         struct list_head *entry, *tmp;
85         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
86
87         if (xfrm_state_gc_flush_bundles) {
88                 xfrm_state_gc_flush_bundles = 0;
89                 xfrm_flush_bundles();
90         }
91
92         spin_lock_bh(&xfrm_state_gc_lock);
93         list_splice_init(&xfrm_state_gc_list, &gc_list);
94         spin_unlock_bh(&xfrm_state_gc_lock);
95
96         list_for_each_safe(entry, tmp, &gc_list) {
97                 x = list_entry(entry, struct xfrm_state, bydst);
98                 xfrm_state_gc_destroy(x);
99         }
100         wake_up(&km_waitq);
101 }
102
103 static inline unsigned long make_jiffies(long secs)
104 {
105         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
106                 return MAX_SCHEDULE_TIMEOUT-1;
107         else
108                 return secs*HZ;
109 }
110
111 static void xfrm_timer_handler(unsigned long data)
112 {
113         struct xfrm_state *x = (struct xfrm_state*)data;
114         unsigned long now = (unsigned long)xtime.tv_sec;
115         long next = LONG_MAX;
116         int warn = 0;
117
118         spin_lock(&x->lock);
119         if (x->km.state == XFRM_STATE_DEAD)
120                 goto out;
121         if (x->km.state == XFRM_STATE_EXPIRED)
122                 goto expired;
123         if (x->lft.hard_add_expires_seconds) {
124                 long tmo = x->lft.hard_add_expires_seconds +
125                         x->curlft.add_time - now;
126                 if (tmo <= 0)
127                         goto expired;
128                 if (tmo < next)
129                         next = tmo;
130         }
131         if (x->lft.hard_use_expires_seconds) {
132                 long tmo = x->lft.hard_use_expires_seconds +
133                         (x->curlft.use_time ? : now) - now;
134                 if (tmo <= 0)
135                         goto expired;
136                 if (tmo < next)
137                         next = tmo;
138         }
139         if (x->km.dying)
140                 goto resched;
141         if (x->lft.soft_add_expires_seconds) {
142                 long tmo = x->lft.soft_add_expires_seconds +
143                         x->curlft.add_time - now;
144                 if (tmo <= 0)
145                         warn = 1;
146                 else if (tmo < next)
147                         next = tmo;
148         }
149         if (x->lft.soft_use_expires_seconds) {
150                 long tmo = x->lft.soft_use_expires_seconds +
151                         (x->curlft.use_time ? : now) - now;
152                 if (tmo <= 0)
153                         warn = 1;
154                 else if (tmo < next)
155                         next = tmo;
156         }
157
158         x->km.dying = warn;
159         if (warn)
160                 km_state_expired(x, 0);
161 resched:
162         if (next != LONG_MAX &&
163             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
164                 xfrm_state_hold(x);
165         goto out;
166
167 expired:
168         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
169                 x->km.state = XFRM_STATE_EXPIRED;
170                 wake_up(&km_waitq);
171                 next = 2;
172                 goto resched;
173         }
174         if (!__xfrm_state_delete(x) && x->id.spi)
175                 km_state_expired(x, 1);
176
177 out:
178         spin_unlock(&x->lock);
179         xfrm_state_put(x);
180 }
181
182 struct xfrm_state *xfrm_state_alloc(void)
183 {
184         struct xfrm_state *x;
185
186         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
187
188         if (x) {
189                 memset(x, 0, sizeof(struct xfrm_state));
190                 atomic_set(&x->refcnt, 1);
191                 atomic_set(&x->tunnel_users, 0);
192                 INIT_LIST_HEAD(&x->bydst);
193                 INIT_LIST_HEAD(&x->byspi);
194                 init_timer(&x->timer);
195                 x->timer.function = xfrm_timer_handler;
196                 x->timer.data     = (unsigned long)x;
197                 init_timer(&x->rtimer);
198                 x->rtimer.function = xfrm_replay_timer_handler;
199                 x->rtimer.data     = (unsigned long)x;
200                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
201                 x->lft.soft_byte_limit = XFRM_INF;
202                 x->lft.soft_packet_limit = XFRM_INF;
203                 x->lft.hard_byte_limit = XFRM_INF;
204                 x->lft.hard_packet_limit = XFRM_INF;
205                 x->replay_maxage = 0;
206                 x->replay_maxdiff = 0;
207                 spin_lock_init(&x->lock);
208         }
209         return x;
210 }
211 EXPORT_SYMBOL(xfrm_state_alloc);
212
213 void __xfrm_state_destroy(struct xfrm_state *x)
214 {
215         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         list_add(&x->bydst, &xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220         schedule_work(&xfrm_state_gc_work);
221 }
222 EXPORT_SYMBOL(__xfrm_state_destroy);
223
224 static int __xfrm_state_delete(struct xfrm_state *x)
225 {
226         int err = -ESRCH;
227
228         if (x->km.state != XFRM_STATE_DEAD) {
229                 x->km.state = XFRM_STATE_DEAD;
230                 spin_lock(&xfrm_state_lock);
231                 list_del(&x->bydst);
232                 __xfrm_state_put(x);
233                 if (x->id.spi) {
234                         list_del(&x->byspi);
235                         __xfrm_state_put(x);
236                 }
237                 spin_unlock(&xfrm_state_lock);
238                 if (del_timer(&x->timer))
239                         __xfrm_state_put(x);
240                 if (del_timer(&x->rtimer))
241                         __xfrm_state_put(x);
242
243                 /* The number two in this test is the reference
244                  * mentioned in the comment below plus the reference
245                  * our caller holds.  A larger value means that
246                  * there are DSTs attached to this xfrm_state.
247                  */
248                 if (atomic_read(&x->refcnt) > 2) {
249                         xfrm_state_gc_flush_bundles = 1;
250                         schedule_work(&xfrm_state_gc_work);
251                 }
252
253                 /* All xfrm_state objects are created by xfrm_state_alloc.
254                  * The xfrm_state_alloc call gives a reference, and that
255                  * is what we are dropping here.
256                  */
257                 __xfrm_state_put(x);
258                 err = 0;
259         }
260
261         return err;
262 }
263
264 int xfrm_state_delete(struct xfrm_state *x)
265 {
266         int err;
267
268         spin_lock_bh(&x->lock);
269         err = __xfrm_state_delete(x);
270         spin_unlock_bh(&x->lock);
271
272         return err;
273 }
274 EXPORT_SYMBOL(xfrm_state_delete);
275
276 void xfrm_state_flush(u8 proto)
277 {
278         int i;
279         struct xfrm_state *x;
280
281         spin_lock_bh(&xfrm_state_lock);
282         for (i = 0; i < XFRM_DST_HSIZE; i++) {
283 restart:
284                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
285                         if (!xfrm_state_kern(x) &&
286                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
287                                 xfrm_state_hold(x);
288                                 spin_unlock_bh(&xfrm_state_lock);
289
290                                 xfrm_state_delete(x);
291                                 xfrm_state_put(x);
292
293                                 spin_lock_bh(&xfrm_state_lock);
294                                 goto restart;
295                         }
296                 }
297         }
298         spin_unlock_bh(&xfrm_state_lock);
299         wake_up(&km_waitq);
300 }
301 EXPORT_SYMBOL(xfrm_state_flush);
302
303 static int
304 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
305                   struct xfrm_tmpl *tmpl,
306                   xfrm_address_t *daddr, xfrm_address_t *saddr,
307                   unsigned short family)
308 {
309         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
310         if (!afinfo)
311                 return -1;
312         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
313         xfrm_state_put_afinfo(afinfo);
314         return 0;
315 }
316
317 struct xfrm_state *
318 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
319                 struct flowi *fl, struct xfrm_tmpl *tmpl,
320                 struct xfrm_policy *pol, int *err,
321                 unsigned short family)
322 {
323         unsigned h = xfrm_dst_hash(daddr, family);
324         struct xfrm_state *x, *x0;
325         int acquire_in_progress = 0;
326         int error = 0;
327         struct xfrm_state *best = NULL;
328         struct xfrm_state_afinfo *afinfo;
329         
330         afinfo = xfrm_state_get_afinfo(family);
331         if (afinfo == NULL) {
332                 *err = -EAFNOSUPPORT;
333                 return NULL;
334         }
335
336         spin_lock_bh(&xfrm_state_lock);
337         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
338                 if (x->props.family == family &&
339                     x->props.reqid == tmpl->reqid &&
340                     xfrm_state_addr_check(x, daddr, saddr, family) &&
341                     tmpl->mode == x->props.mode &&
342                     tmpl->id.proto == x->id.proto &&
343                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
344                         /* Resolution logic:
345                            1. There is a valid state with matching selector.
346                               Done.
347                            2. Valid state with inappropriate selector. Skip.
348
349                            Entering area of "sysdeps".
350
351                            3. If state is not valid, selector is temporary,
352                               it selects only session which triggered
353                               previous resolution. Key manager will do
354                               something to install a state with proper
355                               selector.
356                          */
357                         if (x->km.state == XFRM_STATE_VALID) {
358                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
359                                     !xfrm_sec_ctx_match(pol->security, x->security))
360                                         continue;
361                                 if (!best ||
362                                     best->km.dying > x->km.dying ||
363                                     (best->km.dying == x->km.dying &&
364                                      best->curlft.add_time < x->curlft.add_time))
365                                         best = x;
366                         } else if (x->km.state == XFRM_STATE_ACQ) {
367                                 acquire_in_progress = 1;
368                         } else if (x->km.state == XFRM_STATE_ERROR ||
369                                    x->km.state == XFRM_STATE_EXPIRED) {
370                                 if (xfrm_selector_match(&x->sel, fl, family) &&
371                                     xfrm_sec_ctx_match(pol->security, x->security))
372                                         error = -ESRCH;
373                         }
374                 }
375         }
376
377         x = best;
378         if (!x && !error && !acquire_in_progress) {
379                 if (tmpl->id.spi &&
380                     (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
381                                                tmpl->id.proto)) != NULL) {
382                         xfrm_state_put(x0);
383                         error = -EEXIST;
384                         goto out;
385                 }
386                 x = xfrm_state_alloc();
387                 if (x == NULL) {
388                         error = -ENOMEM;
389                         goto out;
390                 }
391                 /* Initialize temporary selector matching only
392                  * to current session. */
393                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
394
395                 if (km_query(x, tmpl, pol) == 0) {
396                         x->km.state = XFRM_STATE_ACQ;
397                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
398                         xfrm_state_hold(x);
399                         if (x->id.spi) {
400                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
401                                 list_add(&x->byspi, xfrm_state_byspi+h);
402                                 xfrm_state_hold(x);
403                         }
404                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
405                         xfrm_state_hold(x);
406                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
407                         add_timer(&x->timer);
408                 } else {
409                         x->km.state = XFRM_STATE_DEAD;
410                         xfrm_state_put(x);
411                         x = NULL;
412                         error = -ESRCH;
413                 }
414         }
415 out:
416         if (x)
417                 xfrm_state_hold(x);
418         else
419                 *err = acquire_in_progress ? -EAGAIN : error;
420         spin_unlock_bh(&xfrm_state_lock);
421         xfrm_state_put_afinfo(afinfo);
422         return x;
423 }
424
425 static void __xfrm_state_insert(struct xfrm_state *x)
426 {
427         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
428
429         list_add(&x->bydst, xfrm_state_bydst+h);
430         xfrm_state_hold(x);
431
432         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
433
434         list_add(&x->byspi, xfrm_state_byspi+h);
435         xfrm_state_hold(x);
436
437         if (!mod_timer(&x->timer, jiffies + HZ))
438                 xfrm_state_hold(x);
439
440         if (x->replay_maxage &&
441             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
442                 xfrm_state_hold(x);
443
444         wake_up(&km_waitq);
445 }
446
447 void xfrm_state_insert(struct xfrm_state *x)
448 {
449         spin_lock_bh(&xfrm_state_lock);
450         __xfrm_state_insert(x);
451         spin_unlock_bh(&xfrm_state_lock);
452
453         xfrm_flush_all_bundles();
454 }
455 EXPORT_SYMBOL(xfrm_state_insert);
456
457 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
458
459 int xfrm_state_add(struct xfrm_state *x)
460 {
461         struct xfrm_state_afinfo *afinfo;
462         struct xfrm_state *x1;
463         int family;
464         int err;
465
466         family = x->props.family;
467         afinfo = xfrm_state_get_afinfo(family);
468         if (unlikely(afinfo == NULL))
469                 return -EAFNOSUPPORT;
470
471         spin_lock_bh(&xfrm_state_lock);
472
473         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
474         if (x1) {
475                 xfrm_state_put(x1);
476                 x1 = NULL;
477                 err = -EEXIST;
478                 goto out;
479         }
480
481         if (x->km.seq) {
482                 x1 = __xfrm_find_acq_byseq(x->km.seq);
483                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
484                         xfrm_state_put(x1);
485                         x1 = NULL;
486                 }
487         }
488
489         if (!x1)
490                 x1 = afinfo->find_acq(
491                         x->props.mode, x->props.reqid, x->id.proto,
492                         &x->id.daddr, &x->props.saddr, 0);
493
494         __xfrm_state_insert(x);
495         err = 0;
496
497 out:
498         spin_unlock_bh(&xfrm_state_lock);
499         xfrm_state_put_afinfo(afinfo);
500
501         if (!err)
502                 xfrm_flush_all_bundles();
503
504         if (x1) {
505                 xfrm_state_delete(x1);
506                 xfrm_state_put(x1);
507         }
508
509         return err;
510 }
511 EXPORT_SYMBOL(xfrm_state_add);
512
513 int xfrm_state_update(struct xfrm_state *x)
514 {
515         struct xfrm_state_afinfo *afinfo;
516         struct xfrm_state *x1;
517         int err;
518
519         afinfo = xfrm_state_get_afinfo(x->props.family);
520         if (unlikely(afinfo == NULL))
521                 return -EAFNOSUPPORT;
522
523         spin_lock_bh(&xfrm_state_lock);
524         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
525
526         err = -ESRCH;
527         if (!x1)
528                 goto out;
529
530         if (xfrm_state_kern(x1)) {
531                 xfrm_state_put(x1);
532                 err = -EEXIST;
533                 goto out;
534         }
535
536         if (x1->km.state == XFRM_STATE_ACQ) {
537                 __xfrm_state_insert(x);
538                 x = NULL;
539         }
540         err = 0;
541
542 out:
543         spin_unlock_bh(&xfrm_state_lock);
544         xfrm_state_put_afinfo(afinfo);
545
546         if (err)
547                 return err;
548
549         if (!x) {
550                 xfrm_state_delete(x1);
551                 xfrm_state_put(x1);
552                 return 0;
553         }
554
555         err = -EINVAL;
556         spin_lock_bh(&x1->lock);
557         if (likely(x1->km.state == XFRM_STATE_VALID)) {
558                 if (x->encap && x1->encap)
559                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
560                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
561                 x1->km.dying = 0;
562
563                 if (!mod_timer(&x1->timer, jiffies + HZ))
564                         xfrm_state_hold(x1);
565                 if (x1->curlft.use_time)
566                         xfrm_state_check_expire(x1);
567
568                 err = 0;
569         }
570         spin_unlock_bh(&x1->lock);
571
572         xfrm_state_put(x1);
573
574         return err;
575 }
576 EXPORT_SYMBOL(xfrm_state_update);
577
578 int xfrm_state_check_expire(struct xfrm_state *x)
579 {
580         if (!x->curlft.use_time)
581                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
582
583         if (x->km.state != XFRM_STATE_VALID)
584                 return -EINVAL;
585
586         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
587             x->curlft.packets >= x->lft.hard_packet_limit) {
588                 x->km.state = XFRM_STATE_EXPIRED;
589                 if (!mod_timer(&x->timer, jiffies))
590                         xfrm_state_hold(x);
591                 return -EINVAL;
592         }
593
594         if (!x->km.dying &&
595             (x->curlft.bytes >= x->lft.soft_byte_limit ||
596              x->curlft.packets >= x->lft.soft_packet_limit)) {
597                 x->km.dying = 1;
598                 km_state_expired(x, 0);
599         }
600         return 0;
601 }
602 EXPORT_SYMBOL(xfrm_state_check_expire);
603
604 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
605 {
606         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
607                 - skb_headroom(skb);
608
609         if (nhead > 0)
610                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
611
612         /* Check tail too... */
613         return 0;
614 }
615
616 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
617 {
618         int err = xfrm_state_check_expire(x);
619         if (err < 0)
620                 goto err;
621         err = xfrm_state_check_space(x, skb);
622 err:
623         return err;
624 }
625 EXPORT_SYMBOL(xfrm_state_check);
626
627 struct xfrm_state *
628 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
629                   unsigned short family)
630 {
631         struct xfrm_state *x;
632         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
633         if (!afinfo)
634                 return NULL;
635
636         spin_lock_bh(&xfrm_state_lock);
637         x = afinfo->state_lookup(daddr, spi, proto);
638         spin_unlock_bh(&xfrm_state_lock);
639         xfrm_state_put_afinfo(afinfo);
640         return x;
641 }
642 EXPORT_SYMBOL(xfrm_state_lookup);
643
644 struct xfrm_state *
645 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
646               xfrm_address_t *daddr, xfrm_address_t *saddr, 
647               int create, unsigned short family)
648 {
649         struct xfrm_state *x;
650         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
651         if (!afinfo)
652                 return NULL;
653
654         spin_lock_bh(&xfrm_state_lock);
655         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
656         spin_unlock_bh(&xfrm_state_lock);
657         xfrm_state_put_afinfo(afinfo);
658         return x;
659 }
660 EXPORT_SYMBOL(xfrm_find_acq);
661
662 /* Silly enough, but I'm lazy to build resolution list */
663
664 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
665 {
666         int i;
667         struct xfrm_state *x;
668
669         for (i = 0; i < XFRM_DST_HSIZE; i++) {
670                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
671                         if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
672                                 xfrm_state_hold(x);
673                                 return x;
674                         }
675                 }
676         }
677         return NULL;
678 }
679
680 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
681 {
682         struct xfrm_state *x;
683
684         spin_lock_bh(&xfrm_state_lock);
685         x = __xfrm_find_acq_byseq(seq);
686         spin_unlock_bh(&xfrm_state_lock);
687         return x;
688 }
689 EXPORT_SYMBOL(xfrm_find_acq_byseq);
690
691 u32 xfrm_get_acqseq(void)
692 {
693         u32 res;
694         static u32 acqseq;
695         static DEFINE_SPINLOCK(acqseq_lock);
696
697         spin_lock_bh(&acqseq_lock);
698         res = (++acqseq ? : ++acqseq);
699         spin_unlock_bh(&acqseq_lock);
700         return res;
701 }
702 EXPORT_SYMBOL(xfrm_get_acqseq);
703
704 void
705 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
706 {
707         u32 h;
708         struct xfrm_state *x0;
709
710         if (x->id.spi)
711                 return;
712
713         if (minspi == maxspi) {
714                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
715                 if (x0) {
716                         xfrm_state_put(x0);
717                         return;
718                 }
719                 x->id.spi = minspi;
720         } else {
721                 u32 spi = 0;
722                 minspi = ntohl(minspi);
723                 maxspi = ntohl(maxspi);
724                 for (h=0; h<maxspi-minspi+1; h++) {
725                         spi = minspi + net_random()%(maxspi-minspi+1);
726                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
727                         if (x0 == NULL) {
728                                 x->id.spi = htonl(spi);
729                                 break;
730                         }
731                         xfrm_state_put(x0);
732                 }
733         }
734         if (x->id.spi) {
735                 spin_lock_bh(&xfrm_state_lock);
736                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
737                 list_add(&x->byspi, xfrm_state_byspi+h);
738                 xfrm_state_hold(x);
739                 spin_unlock_bh(&xfrm_state_lock);
740                 wake_up(&km_waitq);
741         }
742 }
743 EXPORT_SYMBOL(xfrm_alloc_spi);
744
745 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
746                     void *data)
747 {
748         int i;
749         struct xfrm_state *x;
750         int count = 0;
751         int err = 0;
752
753         spin_lock_bh(&xfrm_state_lock);
754         for (i = 0; i < XFRM_DST_HSIZE; i++) {
755                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
756                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
757                                 count++;
758                 }
759         }
760         if (count == 0) {
761                 err = -ENOENT;
762                 goto out;
763         }
764
765         for (i = 0; i < XFRM_DST_HSIZE; i++) {
766                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
767                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
768                                 continue;
769                         err = func(x, --count, data);
770                         if (err)
771                                 goto out;
772                 }
773         }
774 out:
775         spin_unlock_bh(&xfrm_state_lock);
776         return err;
777 }
778 EXPORT_SYMBOL(xfrm_state_walk);
779
780
781 void xfrm_replay_notify(struct xfrm_state *x, int event)
782 {
783         struct km_event c;
784         /* we send notify messages in case
785          *  1. we updated on of the sequence numbers, and the seqno difference
786          *     is at least x->replay_maxdiff, in this case we also update the
787          *     timeout of our timer function
788          *  2. if x->replay_maxage has elapsed since last update,
789          *     and there were changes
790          *
791          *  The state structure must be locked!
792          */
793
794         switch (event) {
795         case XFRM_REPLAY_UPDATE:
796                 if (x->replay_maxdiff &&
797                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
798                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))
799                         return;
800
801                 break;
802
803         case XFRM_REPLAY_TIMEOUT:
804                 if ((x->replay.seq == x->preplay.seq) &&
805                     (x->replay.bitmap == x->preplay.bitmap) &&
806                     (x->replay.oseq == x->preplay.oseq))
807                         return;
808
809                 break;
810         }
811
812         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
813         c.event = XFRM_MSG_NEWAE;
814         c.data.aevent = event;
815         km_state_notify(x, &c);
816
817 resched:
818         if (x->replay_maxage &&
819             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
820                 xfrm_state_hold(x);
821
822 }
823
824 static void xfrm_replay_timer_handler(unsigned long data)
825 {
826         struct xfrm_state *x = (struct xfrm_state*)data;
827
828         spin_lock(&x->lock);
829
830         if (xfrm_aevent_is_on() && x->km.state == XFRM_STATE_VALID)
831                 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
832
833         spin_unlock(&x->lock);
834 }
835
836 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
837 {
838         u32 diff;
839
840         seq = ntohl(seq);
841
842         if (unlikely(seq == 0))
843                 return -EINVAL;
844
845         if (likely(seq > x->replay.seq))
846                 return 0;
847
848         diff = x->replay.seq - seq;
849         if (diff >= x->props.replay_window) {
850                 x->stats.replay_window++;
851                 return -EINVAL;
852         }
853
854         if (x->replay.bitmap & (1U << diff)) {
855                 x->stats.replay++;
856                 return -EINVAL;
857         }
858         return 0;
859 }
860 EXPORT_SYMBOL(xfrm_replay_check);
861
862 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
863 {
864         u32 diff;
865
866         seq = ntohl(seq);
867
868         if (seq > x->replay.seq) {
869                 diff = seq - x->replay.seq;
870                 if (diff < x->props.replay_window)
871                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
872                 else
873                         x->replay.bitmap = 1;
874                 x->replay.seq = seq;
875         } else {
876                 diff = x->replay.seq - seq;
877                 x->replay.bitmap |= (1U << diff);
878         }
879
880         if (xfrm_aevent_is_on())
881                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
882 }
883 EXPORT_SYMBOL(xfrm_replay_advance);
884
885 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
886 static DEFINE_RWLOCK(xfrm_km_lock);
887
888 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
889 {
890         struct xfrm_mgr *km;
891
892         read_lock(&xfrm_km_lock);
893         list_for_each_entry(km, &xfrm_km_list, list)
894                 if (km->notify_policy)
895                         km->notify_policy(xp, dir, c);
896         read_unlock(&xfrm_km_lock);
897 }
898
899 void km_state_notify(struct xfrm_state *x, struct km_event *c)
900 {
901         struct xfrm_mgr *km;
902         read_lock(&xfrm_km_lock);
903         list_for_each_entry(km, &xfrm_km_list, list)
904                 if (km->notify)
905                         km->notify(x, c);
906         read_unlock(&xfrm_km_lock);
907 }
908
909 EXPORT_SYMBOL(km_policy_notify);
910 EXPORT_SYMBOL(km_state_notify);
911
912 void km_state_expired(struct xfrm_state *x, int hard)
913 {
914         struct km_event c;
915
916         c.data.hard = hard;
917         c.event = XFRM_MSG_EXPIRE;
918         km_state_notify(x, &c);
919
920         if (hard)
921                 wake_up(&km_waitq);
922 }
923
924 /*
925  * We send to all registered managers regardless of failure
926  * We are happy with one success
927 */
928 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
929 {
930         int err = -EINVAL, acqret;
931         struct xfrm_mgr *km;
932
933         read_lock(&xfrm_km_lock);
934         list_for_each_entry(km, &xfrm_km_list, list) {
935                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
936                 if (!acqret)
937                         err = acqret;
938         }
939         read_unlock(&xfrm_km_lock);
940         return err;
941 }
942
943 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
944 {
945         int err = -EINVAL;
946         struct xfrm_mgr *km;
947
948         read_lock(&xfrm_km_lock);
949         list_for_each_entry(km, &xfrm_km_list, list) {
950                 if (km->new_mapping)
951                         err = km->new_mapping(x, ipaddr, sport);
952                 if (!err)
953                         break;
954         }
955         read_unlock(&xfrm_km_lock);
956         return err;
957 }
958 EXPORT_SYMBOL(km_new_mapping);
959
960 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
961 {
962         struct km_event c;
963
964         c.data.hard = hard;
965         c.event = XFRM_MSG_POLEXPIRE;
966         km_policy_notify(pol, dir, &c);
967
968         if (hard)
969                 wake_up(&km_waitq);
970 }
971
972 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
973 {
974         int err;
975         u8 *data;
976         struct xfrm_mgr *km;
977         struct xfrm_policy *pol = NULL;
978
979         if (optlen <= 0 || optlen > PAGE_SIZE)
980                 return -EMSGSIZE;
981
982         data = kmalloc(optlen, GFP_KERNEL);
983         if (!data)
984                 return -ENOMEM;
985
986         err = -EFAULT;
987         if (copy_from_user(data, optval, optlen))
988                 goto out;
989
990         err = -EINVAL;
991         read_lock(&xfrm_km_lock);
992         list_for_each_entry(km, &xfrm_km_list, list) {
993                 pol = km->compile_policy(sk->sk_family, optname, data,
994                                          optlen, &err);
995                 if (err >= 0)
996                         break;
997         }
998         read_unlock(&xfrm_km_lock);
999
1000         if (err >= 0) {
1001                 xfrm_sk_policy_insert(sk, err, pol);
1002                 xfrm_pol_put(pol);
1003                 err = 0;
1004         }
1005
1006 out:
1007         kfree(data);
1008         return err;
1009 }
1010 EXPORT_SYMBOL(xfrm_user_policy);
1011
1012 int xfrm_register_km(struct xfrm_mgr *km)
1013 {
1014         write_lock_bh(&xfrm_km_lock);
1015         list_add_tail(&km->list, &xfrm_km_list);
1016         write_unlock_bh(&xfrm_km_lock);
1017         return 0;
1018 }
1019 EXPORT_SYMBOL(xfrm_register_km);
1020
1021 int xfrm_unregister_km(struct xfrm_mgr *km)
1022 {
1023         write_lock_bh(&xfrm_km_lock);
1024         list_del(&km->list);
1025         write_unlock_bh(&xfrm_km_lock);
1026         return 0;
1027 }
1028 EXPORT_SYMBOL(xfrm_unregister_km);
1029
1030 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1031 {
1032         int err = 0;
1033         if (unlikely(afinfo == NULL))
1034                 return -EINVAL;
1035         if (unlikely(afinfo->family >= NPROTO))
1036                 return -EAFNOSUPPORT;
1037         write_lock(&xfrm_state_afinfo_lock);
1038         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1039                 err = -ENOBUFS;
1040         else {
1041                 afinfo->state_bydst = xfrm_state_bydst;
1042                 afinfo->state_byspi = xfrm_state_byspi;
1043                 xfrm_state_afinfo[afinfo->family] = afinfo;
1044         }
1045         write_unlock(&xfrm_state_afinfo_lock);
1046         return err;
1047 }
1048 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1049
1050 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1051 {
1052         int err = 0;
1053         if (unlikely(afinfo == NULL))
1054                 return -EINVAL;
1055         if (unlikely(afinfo->family >= NPROTO))
1056                 return -EAFNOSUPPORT;
1057         write_lock(&xfrm_state_afinfo_lock);
1058         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1059                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1060                         err = -EINVAL;
1061                 else {
1062                         xfrm_state_afinfo[afinfo->family] = NULL;
1063                         afinfo->state_byspi = NULL;
1064                         afinfo->state_bydst = NULL;
1065                 }
1066         }
1067         write_unlock(&xfrm_state_afinfo_lock);
1068         return err;
1069 }
1070 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1071
1072 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1073 {
1074         struct xfrm_state_afinfo *afinfo;
1075         if (unlikely(family >= NPROTO))
1076                 return NULL;
1077         read_lock(&xfrm_state_afinfo_lock);
1078         afinfo = xfrm_state_afinfo[family];
1079         if (likely(afinfo != NULL))
1080                 read_lock(&afinfo->lock);
1081         read_unlock(&xfrm_state_afinfo_lock);
1082         return afinfo;
1083 }
1084
1085 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1086 {
1087         if (unlikely(afinfo == NULL))
1088                 return;
1089         read_unlock(&afinfo->lock);
1090 }
1091
1092 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1093 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1094 {
1095         if (x->tunnel) {
1096                 struct xfrm_state *t = x->tunnel;
1097
1098                 if (atomic_read(&t->tunnel_users) == 2)
1099                         xfrm_state_delete(t);
1100                 atomic_dec(&t->tunnel_users);
1101                 xfrm_state_put(t);
1102                 x->tunnel = NULL;
1103         }
1104 }
1105 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1106
1107 /*
1108  * This function is NOT optimal.  For example, with ESP it will give an
1109  * MTU that's usually two bytes short of being optimal.  However, it will
1110  * usually give an answer that's a multiple of 4 provided the input is
1111  * also a multiple of 4.
1112  */
1113 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1114 {
1115         int res = mtu;
1116
1117         res -= x->props.header_len;
1118
1119         for (;;) {
1120                 int m = res;
1121
1122                 if (m < 68)
1123                         return 68;
1124
1125                 spin_lock_bh(&x->lock);
1126                 if (x->km.state == XFRM_STATE_VALID &&
1127                     x->type && x->type->get_max_size)
1128                         m = x->type->get_max_size(x, m);
1129                 else
1130                         m += x->props.header_len;
1131                 spin_unlock_bh(&x->lock);
1132
1133                 if (m <= mtu)
1134                         break;
1135                 res -= (m - mtu);
1136         }
1137
1138         return res;
1139 }
1140
1141 EXPORT_SYMBOL(xfrm_state_mtu);
1142
1143 int xfrm_init_state(struct xfrm_state *x)
1144 {
1145         struct xfrm_state_afinfo *afinfo;
1146         int family = x->props.family;
1147         int err;
1148
1149         err = -EAFNOSUPPORT;
1150         afinfo = xfrm_state_get_afinfo(family);
1151         if (!afinfo)
1152                 goto error;
1153
1154         err = 0;
1155         if (afinfo->init_flags)
1156                 err = afinfo->init_flags(x);
1157
1158         xfrm_state_put_afinfo(afinfo);
1159
1160         if (err)
1161                 goto error;
1162
1163         err = -EPROTONOSUPPORT;
1164         x->type = xfrm_get_type(x->id.proto, family);
1165         if (x->type == NULL)
1166                 goto error;
1167
1168         err = x->type->init_state(x);
1169         if (err)
1170                 goto error;
1171
1172         x->km.state = XFRM_STATE_VALID;
1173
1174 error:
1175         return err;
1176 }
1177
1178 EXPORT_SYMBOL(xfrm_init_state);
1179  
1180 void __init xfrm_state_init(void)
1181 {
1182         int i;
1183
1184         for (i=0; i<XFRM_DST_HSIZE; i++) {
1185                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1186                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1187         }
1188         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1189 }
1190