Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
[pandora-kernel.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24 #include <linux/cache.h>
25
26 #include "xfrm_hash.h"
27
28 struct sock *xfrm_nl;
29 EXPORT_SYMBOL(xfrm_nl);
30
31 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
32 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
33
34 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
35 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
36
37 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
38
39 /* Each xfrm_state may be linked to two tables:
40
41    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
42    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
43       destination/tunnel endpoint. (output)
44  */
45
46 static DEFINE_SPINLOCK(xfrm_state_lock);
47
48 /* Hash table to find appropriate SA towards given target (endpoint
49  * of tunnel or destination of transport mode) allowed by selector.
50  *
51  * Main use is finding SA after policy selected tunnel or transport mode.
52  * Also, it can be used by ah/esp icmp error handler to find offending SA.
53  */
54 static struct hlist_head *xfrm_state_bydst __read_mostly;
55 static struct hlist_head *xfrm_state_bysrc __read_mostly;
56 static struct hlist_head *xfrm_state_byspi __read_mostly;
57 static unsigned int xfrm_state_hmask __read_mostly;
58 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
59 static unsigned int xfrm_state_num;
60 static unsigned int xfrm_state_genid;
61
62 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
63                                          xfrm_address_t *saddr,
64                                          u32 reqid,
65                                          unsigned short family)
66 {
67         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
68 }
69
70 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
71                                          xfrm_address_t *saddr,
72                                          unsigned short family)
73 {
74         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
75 }
76
77 static inline unsigned int
78 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
79 {
80         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
81 }
82
83 static void xfrm_hash_transfer(struct hlist_head *list,
84                                struct hlist_head *ndsttable,
85                                struct hlist_head *nsrctable,
86                                struct hlist_head *nspitable,
87                                unsigned int nhashmask)
88 {
89         struct hlist_node *entry, *tmp;
90         struct xfrm_state *x;
91
92         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
93                 unsigned int h;
94
95                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
96                                     x->props.reqid, x->props.family,
97                                     nhashmask);
98                 hlist_add_head(&x->bydst, ndsttable+h);
99
100                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
101                                     x->props.family,
102                                     nhashmask);
103                 hlist_add_head(&x->bysrc, nsrctable+h);
104
105                 if (x->id.spi) {
106                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
107                                             x->id.proto, x->props.family,
108                                             nhashmask);
109                         hlist_add_head(&x->byspi, nspitable+h);
110                 }
111         }
112 }
113
114 static unsigned long xfrm_hash_new_size(void)
115 {
116         return ((xfrm_state_hmask + 1) << 1) *
117                 sizeof(struct hlist_head);
118 }
119
120 static DEFINE_MUTEX(hash_resize_mutex);
121
122 static void xfrm_hash_resize(struct work_struct *__unused)
123 {
124         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
125         unsigned long nsize, osize;
126         unsigned int nhashmask, ohashmask;
127         int i;
128
129         mutex_lock(&hash_resize_mutex);
130
131         nsize = xfrm_hash_new_size();
132         ndst = xfrm_hash_alloc(nsize);
133         if (!ndst)
134                 goto out_unlock;
135         nsrc = xfrm_hash_alloc(nsize);
136         if (!nsrc) {
137                 xfrm_hash_free(ndst, nsize);
138                 goto out_unlock;
139         }
140         nspi = xfrm_hash_alloc(nsize);
141         if (!nspi) {
142                 xfrm_hash_free(ndst, nsize);
143                 xfrm_hash_free(nsrc, nsize);
144                 goto out_unlock;
145         }
146
147         spin_lock_bh(&xfrm_state_lock);
148
149         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
150         for (i = xfrm_state_hmask; i >= 0; i--)
151                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
152                                    nhashmask);
153
154         odst = xfrm_state_bydst;
155         osrc = xfrm_state_bysrc;
156         ospi = xfrm_state_byspi;
157         ohashmask = xfrm_state_hmask;
158
159         xfrm_state_bydst = ndst;
160         xfrm_state_bysrc = nsrc;
161         xfrm_state_byspi = nspi;
162         xfrm_state_hmask = nhashmask;
163
164         spin_unlock_bh(&xfrm_state_lock);
165
166         osize = (ohashmask + 1) * sizeof(struct hlist_head);
167         xfrm_hash_free(odst, osize);
168         xfrm_hash_free(osrc, osize);
169         xfrm_hash_free(ospi, osize);
170
171 out_unlock:
172         mutex_unlock(&hash_resize_mutex);
173 }
174
175 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
176
177 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
178 EXPORT_SYMBOL(km_waitq);
179
180 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
181 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
182
183 static struct work_struct xfrm_state_gc_work;
184 static HLIST_HEAD(xfrm_state_gc_list);
185 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
186
187 int __xfrm_state_delete(struct xfrm_state *x);
188
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
193 {
194         del_timer_sync(&x->timer);
195         del_timer_sync(&x->rtimer);
196         kfree(x->aalg);
197         kfree(x->ealg);
198         kfree(x->calg);
199         kfree(x->encap);
200         kfree(x->coaddr);
201         if (x->mode)
202                 xfrm_put_mode(x->mode);
203         if (x->type) {
204                 x->type->destructor(x);
205                 xfrm_put_type(x->type);
206         }
207         security_xfrm_state_free(x);
208         kfree(x);
209 }
210
211 static void xfrm_state_gc_task(struct work_struct *data)
212 {
213         struct xfrm_state *x;
214         struct hlist_node *entry, *tmp;
215         struct hlist_head gc_list;
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         gc_list.first = xfrm_state_gc_list.first;
219         INIT_HLIST_HEAD(&xfrm_state_gc_list);
220         spin_unlock_bh(&xfrm_state_gc_lock);
221
222         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223                 xfrm_state_gc_destroy(x);
224
225         wake_up(&km_waitq);
226 }
227
228 static inline unsigned long make_jiffies(long secs)
229 {
230         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231                 return MAX_SCHEDULE_TIMEOUT-1;
232         else
233                 return secs*HZ;
234 }
235
236 static void xfrm_timer_handler(unsigned long data)
237 {
238         struct xfrm_state *x = (struct xfrm_state*)data;
239         unsigned long now = get_seconds();
240         long next = LONG_MAX;
241         int warn = 0;
242         int err = 0;
243
244         spin_lock(&x->lock);
245         if (x->km.state == XFRM_STATE_DEAD)
246                 goto out;
247         if (x->km.state == XFRM_STATE_EXPIRED)
248                 goto expired;
249         if (x->lft.hard_add_expires_seconds) {
250                 long tmo = x->lft.hard_add_expires_seconds +
251                         x->curlft.add_time - now;
252                 if (tmo <= 0)
253                         goto expired;
254                 if (tmo < next)
255                         next = tmo;
256         }
257         if (x->lft.hard_use_expires_seconds) {
258                 long tmo = x->lft.hard_use_expires_seconds +
259                         (x->curlft.use_time ? : now) - now;
260                 if (tmo <= 0)
261                         goto expired;
262                 if (tmo < next)
263                         next = tmo;
264         }
265         if (x->km.dying)
266                 goto resched;
267         if (x->lft.soft_add_expires_seconds) {
268                 long tmo = x->lft.soft_add_expires_seconds +
269                         x->curlft.add_time - now;
270                 if (tmo <= 0)
271                         warn = 1;
272                 else if (tmo < next)
273                         next = tmo;
274         }
275         if (x->lft.soft_use_expires_seconds) {
276                 long tmo = x->lft.soft_use_expires_seconds +
277                         (x->curlft.use_time ? : now) - now;
278                 if (tmo <= 0)
279                         warn = 1;
280                 else if (tmo < next)
281                         next = tmo;
282         }
283
284         x->km.dying = warn;
285         if (warn)
286                 km_state_expired(x, 0, 0);
287 resched:
288         if (next != LONG_MAX)
289                 mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291         goto out;
292
293 expired:
294         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295                 x->km.state = XFRM_STATE_EXPIRED;
296                 wake_up(&km_waitq);
297                 next = 2;
298                 goto resched;
299         }
300
301         err = __xfrm_state_delete(x);
302         if (!err && x->id.spi)
303                 km_state_expired(x, 1, 0);
304
305         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308 out:
309         spin_unlock(&x->lock);
310 }
311
312 static void xfrm_replay_timer_handler(unsigned long data);
313
314 struct xfrm_state *xfrm_state_alloc(void)
315 {
316         struct xfrm_state *x;
317
318         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320         if (x) {
321                 atomic_set(&x->refcnt, 1);
322                 atomic_set(&x->tunnel_users, 0);
323                 INIT_HLIST_NODE(&x->bydst);
324                 INIT_HLIST_NODE(&x->bysrc);
325                 INIT_HLIST_NODE(&x->byspi);
326                 init_timer(&x->timer);
327                 x->timer.function = xfrm_timer_handler;
328                 x->timer.data     = (unsigned long)x;
329                 init_timer(&x->rtimer);
330                 x->rtimer.function = xfrm_replay_timer_handler;
331                 x->rtimer.data     = (unsigned long)x;
332                 x->curlft.add_time = get_seconds();
333                 x->lft.soft_byte_limit = XFRM_INF;
334                 x->lft.soft_packet_limit = XFRM_INF;
335                 x->lft.hard_byte_limit = XFRM_INF;
336                 x->lft.hard_packet_limit = XFRM_INF;
337                 x->replay_maxage = 0;
338                 x->replay_maxdiff = 0;
339                 spin_lock_init(&x->lock);
340         }
341         return x;
342 }
343 EXPORT_SYMBOL(xfrm_state_alloc);
344
345 void __xfrm_state_destroy(struct xfrm_state *x)
346 {
347         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349         spin_lock_bh(&xfrm_state_gc_lock);
350         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351         spin_unlock_bh(&xfrm_state_gc_lock);
352         schedule_work(&xfrm_state_gc_work);
353 }
354 EXPORT_SYMBOL(__xfrm_state_destroy);
355
356 int __xfrm_state_delete(struct xfrm_state *x)
357 {
358         int err = -ESRCH;
359
360         if (x->km.state != XFRM_STATE_DEAD) {
361                 x->km.state = XFRM_STATE_DEAD;
362                 spin_lock(&xfrm_state_lock);
363                 hlist_del(&x->bydst);
364                 hlist_del(&x->bysrc);
365                 if (x->id.spi)
366                         hlist_del(&x->byspi);
367                 xfrm_state_num--;
368                 spin_unlock(&xfrm_state_lock);
369
370                 /* All xfrm_state objects are created by xfrm_state_alloc.
371                  * The xfrm_state_alloc call gives a reference, and that
372                  * is what we are dropping here.
373                  */
374                 __xfrm_state_put(x);
375                 err = 0;
376         }
377
378         return err;
379 }
380 EXPORT_SYMBOL(__xfrm_state_delete);
381
382 int xfrm_state_delete(struct xfrm_state *x)
383 {
384         int err;
385
386         spin_lock_bh(&x->lock);
387         err = __xfrm_state_delete(x);
388         spin_unlock_bh(&x->lock);
389
390         return err;
391 }
392 EXPORT_SYMBOL(xfrm_state_delete);
393
394 #ifdef CONFIG_SECURITY_NETWORK_XFRM
395 static inline int
396 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
397 {
398         int i, err = 0;
399
400         for (i = 0; i <= xfrm_state_hmask; i++) {
401                 struct hlist_node *entry;
402                 struct xfrm_state *x;
403
404                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405                         if (xfrm_id_proto_match(x->id.proto, proto) &&
406                            (err = security_xfrm_state_delete(x)) != 0) {
407                                 xfrm_audit_log(audit_info->loginuid,
408                                                audit_info->secid,
409                                                AUDIT_MAC_IPSEC_DELSA,
410                                                0, NULL, x);
411
412                                 return err;
413                         }
414                 }
415         }
416
417         return err;
418 }
419 #else
420 static inline int
421 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
422 {
423         return 0;
424 }
425 #endif
426
427 int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
428 {
429         int i, err = 0;
430
431         spin_lock_bh(&xfrm_state_lock);
432         err = xfrm_state_flush_secctx_check(proto, audit_info);
433         if (err)
434                 goto out;
435
436         for (i = 0; i <= xfrm_state_hmask; i++) {
437                 struct hlist_node *entry;
438                 struct xfrm_state *x;
439 restart:
440                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
441                         if (!xfrm_state_kern(x) &&
442                             xfrm_id_proto_match(x->id.proto, proto)) {
443                                 xfrm_state_hold(x);
444                                 spin_unlock_bh(&xfrm_state_lock);
445
446                                 err = xfrm_state_delete(x);
447                                 xfrm_audit_log(audit_info->loginuid,
448                                                audit_info->secid,
449                                                AUDIT_MAC_IPSEC_DELSA,
450                                                err ? 0 : 1, NULL, x);
451                                 xfrm_state_put(x);
452
453                                 spin_lock_bh(&xfrm_state_lock);
454                                 goto restart;
455                         }
456                 }
457         }
458         err = 0;
459
460 out:
461         spin_unlock_bh(&xfrm_state_lock);
462         wake_up(&km_waitq);
463         return err;
464 }
465 EXPORT_SYMBOL(xfrm_state_flush);
466
467 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
468 {
469         spin_lock_bh(&xfrm_state_lock);
470         si->sadcnt = xfrm_state_num;
471         si->sadhcnt = xfrm_state_hmask;
472         si->sadhmcnt = xfrm_state_hashmax;
473         spin_unlock_bh(&xfrm_state_lock);
474 }
475 EXPORT_SYMBOL(xfrm_sad_getinfo);
476
477 static int
478 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
479                   struct xfrm_tmpl *tmpl,
480                   xfrm_address_t *daddr, xfrm_address_t *saddr,
481                   unsigned short family)
482 {
483         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
484         if (!afinfo)
485                 return -1;
486         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
487         xfrm_state_put_afinfo(afinfo);
488         return 0;
489 }
490
491 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
492 {
493         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
494         struct xfrm_state *x;
495         struct hlist_node *entry;
496
497         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
498                 if (x->props.family != family ||
499                     x->id.spi       != spi ||
500                     x->id.proto     != proto)
501                         continue;
502
503                 switch (family) {
504                 case AF_INET:
505                         if (x->id.daddr.a4 != daddr->a4)
506                                 continue;
507                         break;
508                 case AF_INET6:
509                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
510                                              (struct in6_addr *)
511                                              x->id.daddr.a6))
512                                 continue;
513                         break;
514                 }
515
516                 xfrm_state_hold(x);
517                 return x;
518         }
519
520         return NULL;
521 }
522
523 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
524 {
525         unsigned int h = xfrm_src_hash(daddr, saddr, family);
526         struct xfrm_state *x;
527         struct hlist_node *entry;
528
529         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
530                 if (x->props.family != family ||
531                     x->id.proto     != proto)
532                         continue;
533
534                 switch (family) {
535                 case AF_INET:
536                         if (x->id.daddr.a4 != daddr->a4 ||
537                             x->props.saddr.a4 != saddr->a4)
538                                 continue;
539                         break;
540                 case AF_INET6:
541                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
542                                              (struct in6_addr *)
543                                              x->id.daddr.a6) ||
544                             !ipv6_addr_equal((struct in6_addr *)saddr,
545                                              (struct in6_addr *)
546                                              x->props.saddr.a6))
547                                 continue;
548                         break;
549                 }
550
551                 xfrm_state_hold(x);
552                 return x;
553         }
554
555         return NULL;
556 }
557
558 static inline struct xfrm_state *
559 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
560 {
561         if (use_spi)
562                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
563                                            x->id.proto, family);
564         else
565                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
566                                                   &x->props.saddr,
567                                                   x->id.proto, family);
568 }
569
570 static void xfrm_hash_grow_check(int have_hash_collision)
571 {
572         if (have_hash_collision &&
573             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
574             xfrm_state_num > xfrm_state_hmask)
575                 schedule_work(&xfrm_hash_work);
576 }
577
578 struct xfrm_state *
579 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
580                 struct flowi *fl, struct xfrm_tmpl *tmpl,
581                 struct xfrm_policy *pol, int *err,
582                 unsigned short family)
583 {
584         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
585         struct hlist_node *entry;
586         struct xfrm_state *x, *x0;
587         int acquire_in_progress = 0;
588         int error = 0;
589         struct xfrm_state *best = NULL;
590
591         spin_lock_bh(&xfrm_state_lock);
592         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
593                 if (x->props.family == family &&
594                     x->props.reqid == tmpl->reqid &&
595                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
596                     xfrm_state_addr_check(x, daddr, saddr, family) &&
597                     tmpl->mode == x->props.mode &&
598                     tmpl->id.proto == x->id.proto &&
599                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
600                         /* Resolution logic:
601                            1. There is a valid state with matching selector.
602                               Done.
603                            2. Valid state with inappropriate selector. Skip.
604
605                            Entering area of "sysdeps".
606
607                            3. If state is not valid, selector is temporary,
608                               it selects only session which triggered
609                               previous resolution. Key manager will do
610                               something to install a state with proper
611                               selector.
612                          */
613                         if (x->km.state == XFRM_STATE_VALID) {
614                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
615                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
616                                         continue;
617                                 if (!best ||
618                                     best->km.dying > x->km.dying ||
619                                     (best->km.dying == x->km.dying &&
620                                      best->curlft.add_time < x->curlft.add_time))
621                                         best = x;
622                         } else if (x->km.state == XFRM_STATE_ACQ) {
623                                 acquire_in_progress = 1;
624                         } else if (x->km.state == XFRM_STATE_ERROR ||
625                                    x->km.state == XFRM_STATE_EXPIRED) {
626                                 if (xfrm_selector_match(&x->sel, fl, family) &&
627                                     security_xfrm_state_pol_flow_match(x, pol, fl))
628                                         error = -ESRCH;
629                         }
630                 }
631         }
632
633         x = best;
634         if (!x && !error && !acquire_in_progress) {
635                 if (tmpl->id.spi &&
636                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
637                                               tmpl->id.proto, family)) != NULL) {
638                         xfrm_state_put(x0);
639                         error = -EEXIST;
640                         goto out;
641                 }
642                 x = xfrm_state_alloc();
643                 if (x == NULL) {
644                         error = -ENOMEM;
645                         goto out;
646                 }
647                 /* Initialize temporary selector matching only
648                  * to current session. */
649                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
650
651                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
652                 if (error) {
653                         x->km.state = XFRM_STATE_DEAD;
654                         xfrm_state_put(x);
655                         x = NULL;
656                         goto out;
657                 }
658
659                 if (km_query(x, tmpl, pol) == 0) {
660                         x->km.state = XFRM_STATE_ACQ;
661                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
662                         h = xfrm_src_hash(daddr, saddr, family);
663                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
664                         if (x->id.spi) {
665                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
666                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
667                         }
668                         x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
669                         x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
670                         add_timer(&x->timer);
671                         xfrm_state_num++;
672                         xfrm_hash_grow_check(x->bydst.next != NULL);
673                 } else {
674                         x->km.state = XFRM_STATE_DEAD;
675                         xfrm_state_put(x);
676                         x = NULL;
677                         error = -ESRCH;
678                 }
679         }
680 out:
681         if (x)
682                 xfrm_state_hold(x);
683         else
684                 *err = acquire_in_progress ? -EAGAIN : error;
685         spin_unlock_bh(&xfrm_state_lock);
686         return x;
687 }
688
689 struct xfrm_state *
690 xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
691                     unsigned short family, u8 mode, u8 proto, u32 reqid)
692 {
693         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
694         struct xfrm_state *rx = NULL, *x = NULL;
695         struct hlist_node *entry;
696
697         spin_lock(&xfrm_state_lock);
698         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
699                 if (x->props.family == family &&
700                     x->props.reqid == reqid &&
701                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
702                     xfrm_state_addr_check(x, daddr, saddr, family) &&
703                     mode == x->props.mode &&
704                     proto == x->id.proto &&
705                     x->km.state == XFRM_STATE_VALID) {
706                         rx = x;
707                         break;
708                 }
709         }
710
711         if (rx)
712                 xfrm_state_hold(rx);
713         spin_unlock(&xfrm_state_lock);
714
715
716         return rx;
717 }
718 EXPORT_SYMBOL(xfrm_stateonly_find);
719
720 static void __xfrm_state_insert(struct xfrm_state *x)
721 {
722         unsigned int h;
723
724         x->genid = ++xfrm_state_genid;
725
726         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
727                           x->props.reqid, x->props.family);
728         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
729
730         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
731         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
732
733         if (x->id.spi) {
734                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
735                                   x->props.family);
736
737                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
738         }
739
740         mod_timer(&x->timer, jiffies + HZ);
741         if (x->replay_maxage)
742                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
743
744         wake_up(&km_waitq);
745
746         xfrm_state_num++;
747
748         xfrm_hash_grow_check(x->bydst.next != NULL);
749 }
750
751 /* xfrm_state_lock is held */
752 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
753 {
754         unsigned short family = xnew->props.family;
755         u32 reqid = xnew->props.reqid;
756         struct xfrm_state *x;
757         struct hlist_node *entry;
758         unsigned int h;
759
760         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
761         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
762                 if (x->props.family     == family &&
763                     x->props.reqid      == reqid &&
764                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
765                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
766                         x->genid = xfrm_state_genid;
767         }
768 }
769
770 void xfrm_state_insert(struct xfrm_state *x)
771 {
772         spin_lock_bh(&xfrm_state_lock);
773         __xfrm_state_bump_genids(x);
774         __xfrm_state_insert(x);
775         spin_unlock_bh(&xfrm_state_lock);
776 }
777 EXPORT_SYMBOL(xfrm_state_insert);
778
779 /* xfrm_state_lock is held */
780 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
781 {
782         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
783         struct hlist_node *entry;
784         struct xfrm_state *x;
785
786         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
787                 if (x->props.reqid  != reqid ||
788                     x->props.mode   != mode ||
789                     x->props.family != family ||
790                     x->km.state     != XFRM_STATE_ACQ ||
791                     x->id.spi       != 0 ||
792                     x->id.proto     != proto)
793                         continue;
794
795                 switch (family) {
796                 case AF_INET:
797                         if (x->id.daddr.a4    != daddr->a4 ||
798                             x->props.saddr.a4 != saddr->a4)
799                                 continue;
800                         break;
801                 case AF_INET6:
802                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
803                                              (struct in6_addr *)daddr) ||
804                             !ipv6_addr_equal((struct in6_addr *)
805                                              x->props.saddr.a6,
806                                              (struct in6_addr *)saddr))
807                                 continue;
808                         break;
809                 }
810
811                 xfrm_state_hold(x);
812                 return x;
813         }
814
815         if (!create)
816                 return NULL;
817
818         x = xfrm_state_alloc();
819         if (likely(x)) {
820                 switch (family) {
821                 case AF_INET:
822                         x->sel.daddr.a4 = daddr->a4;
823                         x->sel.saddr.a4 = saddr->a4;
824                         x->sel.prefixlen_d = 32;
825                         x->sel.prefixlen_s = 32;
826                         x->props.saddr.a4 = saddr->a4;
827                         x->id.daddr.a4 = daddr->a4;
828                         break;
829
830                 case AF_INET6:
831                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
832                                        (struct in6_addr *)daddr);
833                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
834                                        (struct in6_addr *)saddr);
835                         x->sel.prefixlen_d = 128;
836                         x->sel.prefixlen_s = 128;
837                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
838                                        (struct in6_addr *)saddr);
839                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
840                                        (struct in6_addr *)daddr);
841                         break;
842                 }
843
844                 x->km.state = XFRM_STATE_ACQ;
845                 x->id.proto = proto;
846                 x->props.family = family;
847                 x->props.mode = mode;
848                 x->props.reqid = reqid;
849                 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
850                 xfrm_state_hold(x);
851                 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
852                 add_timer(&x->timer);
853                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
854                 h = xfrm_src_hash(daddr, saddr, family);
855                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
856                 wake_up(&km_waitq);
857
858                 xfrm_state_num++;
859
860                 xfrm_hash_grow_check(x->bydst.next != NULL);
861         }
862
863         return x;
864 }
865
866 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
867
868 int xfrm_state_add(struct xfrm_state *x)
869 {
870         struct xfrm_state *x1;
871         int family;
872         int err;
873         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
874
875         family = x->props.family;
876
877         spin_lock_bh(&xfrm_state_lock);
878
879         x1 = __xfrm_state_locate(x, use_spi, family);
880         if (x1) {
881                 xfrm_state_put(x1);
882                 x1 = NULL;
883                 err = -EEXIST;
884                 goto out;
885         }
886
887         if (use_spi && x->km.seq) {
888                 x1 = __xfrm_find_acq_byseq(x->km.seq);
889                 if (x1 && ((x1->id.proto != x->id.proto) ||
890                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
891                         xfrm_state_put(x1);
892                         x1 = NULL;
893                 }
894         }
895
896         if (use_spi && !x1)
897                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
898                                      x->id.proto,
899                                      &x->id.daddr, &x->props.saddr, 0);
900
901         __xfrm_state_bump_genids(x);
902         __xfrm_state_insert(x);
903         err = 0;
904
905 out:
906         spin_unlock_bh(&xfrm_state_lock);
907
908         if (x1) {
909                 xfrm_state_delete(x1);
910                 xfrm_state_put(x1);
911         }
912
913         return err;
914 }
915 EXPORT_SYMBOL(xfrm_state_add);
916
917 #ifdef CONFIG_XFRM_MIGRATE
918 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
919 {
920         int err = -ENOMEM;
921         struct xfrm_state *x = xfrm_state_alloc();
922         if (!x)
923                 goto error;
924
925         memcpy(&x->id, &orig->id, sizeof(x->id));
926         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
927         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
928         x->props.mode = orig->props.mode;
929         x->props.replay_window = orig->props.replay_window;
930         x->props.reqid = orig->props.reqid;
931         x->props.family = orig->props.family;
932         x->props.saddr = orig->props.saddr;
933
934         if (orig->aalg) {
935                 x->aalg = xfrm_algo_clone(orig->aalg);
936                 if (!x->aalg)
937                         goto error;
938         }
939         x->props.aalgo = orig->props.aalgo;
940
941         if (orig->ealg) {
942                 x->ealg = xfrm_algo_clone(orig->ealg);
943                 if (!x->ealg)
944                         goto error;
945         }
946         x->props.ealgo = orig->props.ealgo;
947
948         if (orig->calg) {
949                 x->calg = xfrm_algo_clone(orig->calg);
950                 if (!x->calg)
951                         goto error;
952         }
953         x->props.calgo = orig->props.calgo;
954
955         if (orig->encap) {
956                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
957                 if (!x->encap)
958                         goto error;
959         }
960
961         if (orig->coaddr) {
962                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
963                                     GFP_KERNEL);
964                 if (!x->coaddr)
965                         goto error;
966         }
967
968         err = xfrm_init_state(x);
969         if (err)
970                 goto error;
971
972         x->props.flags = orig->props.flags;
973
974         x->curlft.add_time = orig->curlft.add_time;
975         x->km.state = orig->km.state;
976         x->km.seq = orig->km.seq;
977
978         return x;
979
980  error:
981         if (errp)
982                 *errp = err;
983         if (x) {
984                 kfree(x->aalg);
985                 kfree(x->ealg);
986                 kfree(x->calg);
987                 kfree(x->encap);
988                 kfree(x->coaddr);
989         }
990         kfree(x);
991         return NULL;
992 }
993 EXPORT_SYMBOL(xfrm_state_clone);
994
995 /* xfrm_state_lock is held */
996 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
997 {
998         unsigned int h;
999         struct xfrm_state *x;
1000         struct hlist_node *entry;
1001
1002         if (m->reqid) {
1003                 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
1004                                   m->reqid, m->old_family);
1005                 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
1006                         if (x->props.mode != m->mode ||
1007                             x->id.proto != m->proto)
1008                                 continue;
1009                         if (m->reqid && x->props.reqid != m->reqid)
1010                                 continue;
1011                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1012                                           m->old_family) ||
1013                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1014                                           m->old_family))
1015                                 continue;
1016                         xfrm_state_hold(x);
1017                         return x;
1018                 }
1019         } else {
1020                 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
1021                                   m->old_family);
1022                 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
1023                         if (x->props.mode != m->mode ||
1024                             x->id.proto != m->proto)
1025                                 continue;
1026                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1027                                           m->old_family) ||
1028                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1029                                           m->old_family))
1030                                 continue;
1031                         xfrm_state_hold(x);
1032                         return x;
1033                 }
1034         }
1035
1036         return NULL;
1037 }
1038 EXPORT_SYMBOL(xfrm_migrate_state_find);
1039
1040 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1041                                        struct xfrm_migrate *m)
1042 {
1043         struct xfrm_state *xc;
1044         int err;
1045
1046         xc = xfrm_state_clone(x, &err);
1047         if (!xc)
1048                 return NULL;
1049
1050         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1051         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1052
1053         /* add state */
1054         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1055                 /* a care is needed when the destination address of the
1056                    state is to be updated as it is a part of triplet */
1057                 xfrm_state_insert(xc);
1058         } else {
1059                 if ((err = xfrm_state_add(xc)) < 0)
1060                         goto error;
1061         }
1062
1063         return xc;
1064 error:
1065         kfree(xc);
1066         return NULL;
1067 }
1068 EXPORT_SYMBOL(xfrm_state_migrate);
1069 #endif
1070
1071 int xfrm_state_update(struct xfrm_state *x)
1072 {
1073         struct xfrm_state *x1;
1074         int err;
1075         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1076
1077         spin_lock_bh(&xfrm_state_lock);
1078         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1079
1080         err = -ESRCH;
1081         if (!x1)
1082                 goto out;
1083
1084         if (xfrm_state_kern(x1)) {
1085                 xfrm_state_put(x1);
1086                 err = -EEXIST;
1087                 goto out;
1088         }
1089
1090         if (x1->km.state == XFRM_STATE_ACQ) {
1091                 __xfrm_state_insert(x);
1092                 x = NULL;
1093         }
1094         err = 0;
1095
1096 out:
1097         spin_unlock_bh(&xfrm_state_lock);
1098
1099         if (err)
1100                 return err;
1101
1102         if (!x) {
1103                 xfrm_state_delete(x1);
1104                 xfrm_state_put(x1);
1105                 return 0;
1106         }
1107
1108         err = -EINVAL;
1109         spin_lock_bh(&x1->lock);
1110         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1111                 if (x->encap && x1->encap)
1112                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1113                 if (x->coaddr && x1->coaddr) {
1114                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1115                 }
1116                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1117                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1118                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1119                 x1->km.dying = 0;
1120
1121                 mod_timer(&x1->timer, jiffies + HZ);
1122                 if (x1->curlft.use_time)
1123                         xfrm_state_check_expire(x1);
1124
1125                 err = 0;
1126         }
1127         spin_unlock_bh(&x1->lock);
1128
1129         xfrm_state_put(x1);
1130
1131         return err;
1132 }
1133 EXPORT_SYMBOL(xfrm_state_update);
1134
1135 int xfrm_state_check_expire(struct xfrm_state *x)
1136 {
1137         if (!x->curlft.use_time)
1138                 x->curlft.use_time = get_seconds();
1139
1140         if (x->km.state != XFRM_STATE_VALID)
1141                 return -EINVAL;
1142
1143         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1144             x->curlft.packets >= x->lft.hard_packet_limit) {
1145                 x->km.state = XFRM_STATE_EXPIRED;
1146                 mod_timer(&x->timer, jiffies);
1147                 return -EINVAL;
1148         }
1149
1150         if (!x->km.dying &&
1151             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1152              x->curlft.packets >= x->lft.soft_packet_limit)) {
1153                 x->km.dying = 1;
1154                 km_state_expired(x, 0, 0);
1155         }
1156         return 0;
1157 }
1158 EXPORT_SYMBOL(xfrm_state_check_expire);
1159
1160 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1161 {
1162         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1163                 - skb_headroom(skb);
1164
1165         if (nhead > 0)
1166                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1167
1168         /* Check tail too... */
1169         return 0;
1170 }
1171
1172 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1173 {
1174         int err = xfrm_state_check_expire(x);
1175         if (err < 0)
1176                 goto err;
1177         err = xfrm_state_check_space(x, skb);
1178 err:
1179         return err;
1180 }
1181 EXPORT_SYMBOL(xfrm_state_check);
1182
1183 struct xfrm_state *
1184 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1185                   unsigned short family)
1186 {
1187         struct xfrm_state *x;
1188
1189         spin_lock_bh(&xfrm_state_lock);
1190         x = __xfrm_state_lookup(daddr, spi, proto, family);
1191         spin_unlock_bh(&xfrm_state_lock);
1192         return x;
1193 }
1194 EXPORT_SYMBOL(xfrm_state_lookup);
1195
1196 struct xfrm_state *
1197 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1198                          u8 proto, unsigned short family)
1199 {
1200         struct xfrm_state *x;
1201
1202         spin_lock_bh(&xfrm_state_lock);
1203         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1204         spin_unlock_bh(&xfrm_state_lock);
1205         return x;
1206 }
1207 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1208
1209 struct xfrm_state *
1210 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1211               xfrm_address_t *daddr, xfrm_address_t *saddr,
1212               int create, unsigned short family)
1213 {
1214         struct xfrm_state *x;
1215
1216         spin_lock_bh(&xfrm_state_lock);
1217         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1218         spin_unlock_bh(&xfrm_state_lock);
1219
1220         return x;
1221 }
1222 EXPORT_SYMBOL(xfrm_find_acq);
1223
1224 #ifdef CONFIG_XFRM_SUB_POLICY
1225 int
1226 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1227                unsigned short family)
1228 {
1229         int err = 0;
1230         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1231         if (!afinfo)
1232                 return -EAFNOSUPPORT;
1233
1234         spin_lock_bh(&xfrm_state_lock);
1235         if (afinfo->tmpl_sort)
1236                 err = afinfo->tmpl_sort(dst, src, n);
1237         spin_unlock_bh(&xfrm_state_lock);
1238         xfrm_state_put_afinfo(afinfo);
1239         return err;
1240 }
1241 EXPORT_SYMBOL(xfrm_tmpl_sort);
1242
1243 int
1244 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1245                 unsigned short family)
1246 {
1247         int err = 0;
1248         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1249         if (!afinfo)
1250                 return -EAFNOSUPPORT;
1251
1252         spin_lock_bh(&xfrm_state_lock);
1253         if (afinfo->state_sort)
1254                 err = afinfo->state_sort(dst, src, n);
1255         spin_unlock_bh(&xfrm_state_lock);
1256         xfrm_state_put_afinfo(afinfo);
1257         return err;
1258 }
1259 EXPORT_SYMBOL(xfrm_state_sort);
1260 #endif
1261
1262 /* Silly enough, but I'm lazy to build resolution list */
1263
1264 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1265 {
1266         int i;
1267
1268         for (i = 0; i <= xfrm_state_hmask; i++) {
1269                 struct hlist_node *entry;
1270                 struct xfrm_state *x;
1271
1272                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1273                         if (x->km.seq == seq &&
1274                             x->km.state == XFRM_STATE_ACQ) {
1275                                 xfrm_state_hold(x);
1276                                 return x;
1277                         }
1278                 }
1279         }
1280         return NULL;
1281 }
1282
1283 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1284 {
1285         struct xfrm_state *x;
1286
1287         spin_lock_bh(&xfrm_state_lock);
1288         x = __xfrm_find_acq_byseq(seq);
1289         spin_unlock_bh(&xfrm_state_lock);
1290         return x;
1291 }
1292 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1293
1294 u32 xfrm_get_acqseq(void)
1295 {
1296         u32 res;
1297         static u32 acqseq;
1298         static DEFINE_SPINLOCK(acqseq_lock);
1299
1300         spin_lock_bh(&acqseq_lock);
1301         res = (++acqseq ? : ++acqseq);
1302         spin_unlock_bh(&acqseq_lock);
1303         return res;
1304 }
1305 EXPORT_SYMBOL(xfrm_get_acqseq);
1306
1307 void
1308 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1309 {
1310         unsigned int h;
1311         struct xfrm_state *x0;
1312
1313         if (x->id.spi)
1314                 return;
1315
1316         if (minspi == maxspi) {
1317                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1318                 if (x0) {
1319                         xfrm_state_put(x0);
1320                         return;
1321                 }
1322                 x->id.spi = minspi;
1323         } else {
1324                 u32 spi = 0;
1325                 u32 low = ntohl(minspi);
1326                 u32 high = ntohl(maxspi);
1327                 for (h=0; h<high-low+1; h++) {
1328                         spi = low + net_random()%(high-low+1);
1329                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1330                         if (x0 == NULL) {
1331                                 x->id.spi = htonl(spi);
1332                                 break;
1333                         }
1334                         xfrm_state_put(x0);
1335                 }
1336         }
1337         if (x->id.spi) {
1338                 spin_lock_bh(&xfrm_state_lock);
1339                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1340                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1341                 spin_unlock_bh(&xfrm_state_lock);
1342                 wake_up(&km_waitq);
1343         }
1344 }
1345 EXPORT_SYMBOL(xfrm_alloc_spi);
1346
1347 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1348                     void *data)
1349 {
1350         int i;
1351         struct xfrm_state *x, *last = NULL;
1352         struct hlist_node *entry;
1353         int count = 0;
1354         int err = 0;
1355
1356         spin_lock_bh(&xfrm_state_lock);
1357         for (i = 0; i <= xfrm_state_hmask; i++) {
1358                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1359                         if (!xfrm_id_proto_match(x->id.proto, proto))
1360                                 continue;
1361                         if (last) {
1362                                 err = func(last, count, data);
1363                                 if (err)
1364                                         goto out;
1365                         }
1366                         last = x;
1367                         count++;
1368                 }
1369         }
1370         if (count == 0) {
1371                 err = -ENOENT;
1372                 goto out;
1373         }
1374         err = func(last, 0, data);
1375 out:
1376         spin_unlock_bh(&xfrm_state_lock);
1377         return err;
1378 }
1379 EXPORT_SYMBOL(xfrm_state_walk);
1380
1381
1382 void xfrm_replay_notify(struct xfrm_state *x, int event)
1383 {
1384         struct km_event c;
1385         /* we send notify messages in case
1386          *  1. we updated on of the sequence numbers, and the seqno difference
1387          *     is at least x->replay_maxdiff, in this case we also update the
1388          *     timeout of our timer function
1389          *  2. if x->replay_maxage has elapsed since last update,
1390          *     and there were changes
1391          *
1392          *  The state structure must be locked!
1393          */
1394
1395         switch (event) {
1396         case XFRM_REPLAY_UPDATE:
1397                 if (x->replay_maxdiff &&
1398                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1399                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1400                         if (x->xflags & XFRM_TIME_DEFER)
1401                                 event = XFRM_REPLAY_TIMEOUT;
1402                         else
1403                                 return;
1404                 }
1405
1406                 break;
1407
1408         case XFRM_REPLAY_TIMEOUT:
1409                 if ((x->replay.seq == x->preplay.seq) &&
1410                     (x->replay.bitmap == x->preplay.bitmap) &&
1411                     (x->replay.oseq == x->preplay.oseq)) {
1412                         x->xflags |= XFRM_TIME_DEFER;
1413                         return;
1414                 }
1415
1416                 break;
1417         }
1418
1419         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1420         c.event = XFRM_MSG_NEWAE;
1421         c.data.aevent = event;
1422         km_state_notify(x, &c);
1423
1424         if (x->replay_maxage &&
1425             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1426                 x->xflags &= ~XFRM_TIME_DEFER;
1427 }
1428 EXPORT_SYMBOL(xfrm_replay_notify);
1429
1430 static void xfrm_replay_timer_handler(unsigned long data)
1431 {
1432         struct xfrm_state *x = (struct xfrm_state*)data;
1433
1434         spin_lock(&x->lock);
1435
1436         if (x->km.state == XFRM_STATE_VALID) {
1437                 if (xfrm_aevent_is_on())
1438                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1439                 else
1440                         x->xflags |= XFRM_TIME_DEFER;
1441         }
1442
1443         spin_unlock(&x->lock);
1444 }
1445
1446 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1447 {
1448         u32 diff;
1449         u32 seq = ntohl(net_seq);
1450
1451         if (unlikely(seq == 0))
1452                 return -EINVAL;
1453
1454         if (likely(seq > x->replay.seq))
1455                 return 0;
1456
1457         diff = x->replay.seq - seq;
1458         if (diff >= min_t(unsigned int, x->props.replay_window,
1459                           sizeof(x->replay.bitmap) * 8)) {
1460                 x->stats.replay_window++;
1461                 return -EINVAL;
1462         }
1463
1464         if (x->replay.bitmap & (1U << diff)) {
1465                 x->stats.replay++;
1466                 return -EINVAL;
1467         }
1468         return 0;
1469 }
1470 EXPORT_SYMBOL(xfrm_replay_check);
1471
1472 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1473 {
1474         u32 diff;
1475         u32 seq = ntohl(net_seq);
1476
1477         if (seq > x->replay.seq) {
1478                 diff = seq - x->replay.seq;
1479                 if (diff < x->props.replay_window)
1480                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1481                 else
1482                         x->replay.bitmap = 1;
1483                 x->replay.seq = seq;
1484         } else {
1485                 diff = x->replay.seq - seq;
1486                 x->replay.bitmap |= (1U << diff);
1487         }
1488
1489         if (xfrm_aevent_is_on())
1490                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1491 }
1492 EXPORT_SYMBOL(xfrm_replay_advance);
1493
1494 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1495 static DEFINE_RWLOCK(xfrm_km_lock);
1496
1497 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1498 {
1499         struct xfrm_mgr *km;
1500
1501         read_lock(&xfrm_km_lock);
1502         list_for_each_entry(km, &xfrm_km_list, list)
1503                 if (km->notify_policy)
1504                         km->notify_policy(xp, dir, c);
1505         read_unlock(&xfrm_km_lock);
1506 }
1507
1508 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1509 {
1510         struct xfrm_mgr *km;
1511         read_lock(&xfrm_km_lock);
1512         list_for_each_entry(km, &xfrm_km_list, list)
1513                 if (km->notify)
1514                         km->notify(x, c);
1515         read_unlock(&xfrm_km_lock);
1516 }
1517
1518 EXPORT_SYMBOL(km_policy_notify);
1519 EXPORT_SYMBOL(km_state_notify);
1520
1521 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1522 {
1523         struct km_event c;
1524
1525         c.data.hard = hard;
1526         c.pid = pid;
1527         c.event = XFRM_MSG_EXPIRE;
1528         km_state_notify(x, &c);
1529
1530         if (hard)
1531                 wake_up(&km_waitq);
1532 }
1533
1534 EXPORT_SYMBOL(km_state_expired);
1535 /*
1536  * We send to all registered managers regardless of failure
1537  * We are happy with one success
1538 */
1539 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1540 {
1541         int err = -EINVAL, acqret;
1542         struct xfrm_mgr *km;
1543
1544         read_lock(&xfrm_km_lock);
1545         list_for_each_entry(km, &xfrm_km_list, list) {
1546                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1547                 if (!acqret)
1548                         err = acqret;
1549         }
1550         read_unlock(&xfrm_km_lock);
1551         return err;
1552 }
1553 EXPORT_SYMBOL(km_query);
1554
1555 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1556 {
1557         int err = -EINVAL;
1558         struct xfrm_mgr *km;
1559
1560         read_lock(&xfrm_km_lock);
1561         list_for_each_entry(km, &xfrm_km_list, list) {
1562                 if (km->new_mapping)
1563                         err = km->new_mapping(x, ipaddr, sport);
1564                 if (!err)
1565                         break;
1566         }
1567         read_unlock(&xfrm_km_lock);
1568         return err;
1569 }
1570 EXPORT_SYMBOL(km_new_mapping);
1571
1572 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1573 {
1574         struct km_event c;
1575
1576         c.data.hard = hard;
1577         c.pid = pid;
1578         c.event = XFRM_MSG_POLEXPIRE;
1579         km_policy_notify(pol, dir, &c);
1580
1581         if (hard)
1582                 wake_up(&km_waitq);
1583 }
1584 EXPORT_SYMBOL(km_policy_expired);
1585
1586 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1587                struct xfrm_migrate *m, int num_migrate)
1588 {
1589         int err = -EINVAL;
1590         int ret;
1591         struct xfrm_mgr *km;
1592
1593         read_lock(&xfrm_km_lock);
1594         list_for_each_entry(km, &xfrm_km_list, list) {
1595                 if (km->migrate) {
1596                         ret = km->migrate(sel, dir, type, m, num_migrate);
1597                         if (!ret)
1598                                 err = ret;
1599                 }
1600         }
1601         read_unlock(&xfrm_km_lock);
1602         return err;
1603 }
1604 EXPORT_SYMBOL(km_migrate);
1605
1606 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1607 {
1608         int err = -EINVAL;
1609         int ret;
1610         struct xfrm_mgr *km;
1611
1612         read_lock(&xfrm_km_lock);
1613         list_for_each_entry(km, &xfrm_km_list, list) {
1614                 if (km->report) {
1615                         ret = km->report(proto, sel, addr);
1616                         if (!ret)
1617                                 err = ret;
1618                 }
1619         }
1620         read_unlock(&xfrm_km_lock);
1621         return err;
1622 }
1623 EXPORT_SYMBOL(km_report);
1624
1625 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1626 {
1627         int err;
1628         u8 *data;
1629         struct xfrm_mgr *km;
1630         struct xfrm_policy *pol = NULL;
1631
1632         if (optlen <= 0 || optlen > PAGE_SIZE)
1633                 return -EMSGSIZE;
1634
1635         data = kmalloc(optlen, GFP_KERNEL);
1636         if (!data)
1637                 return -ENOMEM;
1638
1639         err = -EFAULT;
1640         if (copy_from_user(data, optval, optlen))
1641                 goto out;
1642
1643         err = -EINVAL;
1644         read_lock(&xfrm_km_lock);
1645         list_for_each_entry(km, &xfrm_km_list, list) {
1646                 pol = km->compile_policy(sk, optname, data,
1647                                          optlen, &err);
1648                 if (err >= 0)
1649                         break;
1650         }
1651         read_unlock(&xfrm_km_lock);
1652
1653         if (err >= 0) {
1654                 xfrm_sk_policy_insert(sk, err, pol);
1655                 xfrm_pol_put(pol);
1656                 err = 0;
1657         }
1658
1659 out:
1660         kfree(data);
1661         return err;
1662 }
1663 EXPORT_SYMBOL(xfrm_user_policy);
1664
1665 int xfrm_register_km(struct xfrm_mgr *km)
1666 {
1667         write_lock_bh(&xfrm_km_lock);
1668         list_add_tail(&km->list, &xfrm_km_list);
1669         write_unlock_bh(&xfrm_km_lock);
1670         return 0;
1671 }
1672 EXPORT_SYMBOL(xfrm_register_km);
1673
1674 int xfrm_unregister_km(struct xfrm_mgr *km)
1675 {
1676         write_lock_bh(&xfrm_km_lock);
1677         list_del(&km->list);
1678         write_unlock_bh(&xfrm_km_lock);
1679         return 0;
1680 }
1681 EXPORT_SYMBOL(xfrm_unregister_km);
1682
1683 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1684 {
1685         int err = 0;
1686         if (unlikely(afinfo == NULL))
1687                 return -EINVAL;
1688         if (unlikely(afinfo->family >= NPROTO))
1689                 return -EAFNOSUPPORT;
1690         write_lock_bh(&xfrm_state_afinfo_lock);
1691         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1692                 err = -ENOBUFS;
1693         else
1694                 xfrm_state_afinfo[afinfo->family] = afinfo;
1695         write_unlock_bh(&xfrm_state_afinfo_lock);
1696         return err;
1697 }
1698 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1699
1700 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1701 {
1702         int err = 0;
1703         if (unlikely(afinfo == NULL))
1704                 return -EINVAL;
1705         if (unlikely(afinfo->family >= NPROTO))
1706                 return -EAFNOSUPPORT;
1707         write_lock_bh(&xfrm_state_afinfo_lock);
1708         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1709                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1710                         err = -EINVAL;
1711                 else
1712                         xfrm_state_afinfo[afinfo->family] = NULL;
1713         }
1714         write_unlock_bh(&xfrm_state_afinfo_lock);
1715         return err;
1716 }
1717 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1718
1719 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1720 {
1721         struct xfrm_state_afinfo *afinfo;
1722         if (unlikely(family >= NPROTO))
1723                 return NULL;
1724         read_lock(&xfrm_state_afinfo_lock);
1725         afinfo = xfrm_state_afinfo[family];
1726         if (unlikely(!afinfo))
1727                 read_unlock(&xfrm_state_afinfo_lock);
1728         return afinfo;
1729 }
1730
1731 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1732 {
1733         read_unlock(&xfrm_state_afinfo_lock);
1734 }
1735
1736 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1737 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1738
1739 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1740 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1741 {
1742         if (x->tunnel) {
1743                 struct xfrm_state *t = x->tunnel;
1744
1745                 if (atomic_read(&t->tunnel_users) == 2)
1746                         xfrm_state_delete(t);
1747                 atomic_dec(&t->tunnel_users);
1748                 xfrm_state_put(t);
1749                 x->tunnel = NULL;
1750         }
1751 }
1752 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1753
1754 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1755 {
1756         int res;
1757
1758         spin_lock_bh(&x->lock);
1759         if (x->km.state == XFRM_STATE_VALID &&
1760             x->type && x->type->get_mtu)
1761                 res = x->type->get_mtu(x, mtu);
1762         else
1763                 res = mtu - x->props.header_len;
1764         spin_unlock_bh(&x->lock);
1765         return res;
1766 }
1767
1768 int xfrm_init_state(struct xfrm_state *x)
1769 {
1770         struct xfrm_state_afinfo *afinfo;
1771         int family = x->props.family;
1772         int err;
1773
1774         err = -EAFNOSUPPORT;
1775         afinfo = xfrm_state_get_afinfo(family);
1776         if (!afinfo)
1777                 goto error;
1778
1779         err = 0;
1780         if (afinfo->init_flags)
1781                 err = afinfo->init_flags(x);
1782
1783         xfrm_state_put_afinfo(afinfo);
1784
1785         if (err)
1786                 goto error;
1787
1788         err = -EPROTONOSUPPORT;
1789         x->type = xfrm_get_type(x->id.proto, family);
1790         if (x->type == NULL)
1791                 goto error;
1792
1793         err = x->type->init_state(x);
1794         if (err)
1795                 goto error;
1796
1797         x->mode = xfrm_get_mode(x->props.mode, family);
1798         if (x->mode == NULL)
1799                 goto error;
1800
1801         x->km.state = XFRM_STATE_VALID;
1802
1803 error:
1804         return err;
1805 }
1806
1807 EXPORT_SYMBOL(xfrm_init_state);
1808
1809 void __init xfrm_state_init(void)
1810 {
1811         unsigned int sz;
1812
1813         sz = sizeof(struct hlist_head) * 8;
1814
1815         xfrm_state_bydst = xfrm_hash_alloc(sz);
1816         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1817         xfrm_state_byspi = xfrm_hash_alloc(sz);
1818         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1819                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1820         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1821
1822         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1823 }
1824