Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/linville/wireles...
[pandora-kernel.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 int sysctl_xfrm_larval_drop __read_mostly;
38
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48
49 static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX];
50 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
51 EXPORT_SYMBOL(xfrm_policy_count);
52
53 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
54 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
55
56 static struct kmem_cache *xfrm_dst_cache __read_mostly;
57
58 static struct work_struct xfrm_policy_gc_work;
59 static HLIST_HEAD(xfrm_policy_gc_list);
60 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
61
62 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
63 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
64 static void xfrm_init_pmtu(struct dst_entry *dst);
65
66 static inline int
67 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
68 {
69         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
70                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
71                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
72                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
73                 (fl->proto == sel->proto || !sel->proto) &&
74                 (fl->oif == sel->ifindex || !sel->ifindex);
75 }
76
77 static inline int
78 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
79 {
80         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
81                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
82                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
83                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
84                 (fl->proto == sel->proto || !sel->proto) &&
85                 (fl->oif == sel->ifindex || !sel->ifindex);
86 }
87
88 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
89                     unsigned short family)
90 {
91         switch (family) {
92         case AF_INET:
93                 return __xfrm4_selector_match(sel, fl);
94         case AF_INET6:
95                 return __xfrm6_selector_match(sel, fl);
96         }
97         return 0;
98 }
99
100 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
101                                                 int family)
102 {
103         xfrm_address_t *saddr = &x->props.saddr;
104         xfrm_address_t *daddr = &x->id.daddr;
105         struct xfrm_policy_afinfo *afinfo;
106         struct dst_entry *dst;
107
108         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
109                 saddr = x->coaddr;
110         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
111                 daddr = x->coaddr;
112
113         afinfo = xfrm_policy_get_afinfo(family);
114         if (unlikely(afinfo == NULL))
115                 return ERR_PTR(-EAFNOSUPPORT);
116
117         dst = afinfo->dst_lookup(tos, saddr, daddr);
118         xfrm_policy_put_afinfo(afinfo);
119         return dst;
120 }
121
122 static inline unsigned long make_jiffies(long secs)
123 {
124         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
125                 return MAX_SCHEDULE_TIMEOUT-1;
126         else
127                 return secs*HZ;
128 }
129
130 static void xfrm_policy_timer(unsigned long data)
131 {
132         struct xfrm_policy *xp = (struct xfrm_policy*)data;
133         unsigned long now = get_seconds();
134         long next = LONG_MAX;
135         int warn = 0;
136         int dir;
137
138         read_lock(&xp->lock);
139
140         if (xp->dead)
141                 goto out;
142
143         dir = xfrm_policy_id2dir(xp->index);
144
145         if (xp->lft.hard_add_expires_seconds) {
146                 long tmo = xp->lft.hard_add_expires_seconds +
147                         xp->curlft.add_time - now;
148                 if (tmo <= 0)
149                         goto expired;
150                 if (tmo < next)
151                         next = tmo;
152         }
153         if (xp->lft.hard_use_expires_seconds) {
154                 long tmo = xp->lft.hard_use_expires_seconds +
155                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
156                 if (tmo <= 0)
157                         goto expired;
158                 if (tmo < next)
159                         next = tmo;
160         }
161         if (xp->lft.soft_add_expires_seconds) {
162                 long tmo = xp->lft.soft_add_expires_seconds +
163                         xp->curlft.add_time - now;
164                 if (tmo <= 0) {
165                         warn = 1;
166                         tmo = XFRM_KM_TIMEOUT;
167                 }
168                 if (tmo < next)
169                         next = tmo;
170         }
171         if (xp->lft.soft_use_expires_seconds) {
172                 long tmo = xp->lft.soft_use_expires_seconds +
173                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
174                 if (tmo <= 0) {
175                         warn = 1;
176                         tmo = XFRM_KM_TIMEOUT;
177                 }
178                 if (tmo < next)
179                         next = tmo;
180         }
181
182         if (warn)
183                 km_policy_expired(xp, dir, 0, 0);
184         if (next != LONG_MAX &&
185             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
186                 xfrm_pol_hold(xp);
187
188 out:
189         read_unlock(&xp->lock);
190         xfrm_pol_put(xp);
191         return;
192
193 expired:
194         read_unlock(&xp->lock);
195         if (!xfrm_policy_delete(xp, dir))
196                 km_policy_expired(xp, dir, 1, 0);
197         xfrm_pol_put(xp);
198 }
199
200
201 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
202  * SPD calls.
203  */
204
205 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
206 {
207         struct xfrm_policy *policy;
208
209         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
210
211         if (policy) {
212                 INIT_LIST_HEAD(&policy->bytype);
213                 INIT_HLIST_NODE(&policy->bydst);
214                 INIT_HLIST_NODE(&policy->byidx);
215                 rwlock_init(&policy->lock);
216                 atomic_set(&policy->refcnt, 1);
217                 setup_timer(&policy->timer, xfrm_policy_timer,
218                                 (unsigned long)policy);
219         }
220         return policy;
221 }
222 EXPORT_SYMBOL(xfrm_policy_alloc);
223
224 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
225
226 void xfrm_policy_destroy(struct xfrm_policy *policy)
227 {
228         BUG_ON(!policy->dead);
229
230         BUG_ON(policy->bundles);
231
232         if (del_timer(&policy->timer))
233                 BUG();
234
235         write_lock_bh(&xfrm_policy_lock);
236         list_del(&policy->bytype);
237         write_unlock_bh(&xfrm_policy_lock);
238
239         security_xfrm_policy_free(policy);
240         kfree(policy);
241 }
242 EXPORT_SYMBOL(xfrm_policy_destroy);
243
244 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
245 {
246         struct dst_entry *dst;
247
248         while ((dst = policy->bundles) != NULL) {
249                 policy->bundles = dst->next;
250                 dst_free(dst);
251         }
252
253         if (del_timer(&policy->timer))
254                 atomic_dec(&policy->refcnt);
255
256         if (atomic_read(&policy->refcnt) > 1)
257                 flow_cache_flush();
258
259         xfrm_pol_put(policy);
260 }
261
262 static void xfrm_policy_gc_task(struct work_struct *work)
263 {
264         struct xfrm_policy *policy;
265         struct hlist_node *entry, *tmp;
266         struct hlist_head gc_list;
267
268         spin_lock_bh(&xfrm_policy_gc_lock);
269         gc_list.first = xfrm_policy_gc_list.first;
270         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
271         spin_unlock_bh(&xfrm_policy_gc_lock);
272
273         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
274                 xfrm_policy_gc_kill(policy);
275 }
276
277 /* Rule must be locked. Release descentant resources, announce
278  * entry dead. The rule must be unlinked from lists to the moment.
279  */
280
281 static void xfrm_policy_kill(struct xfrm_policy *policy)
282 {
283         int dead;
284
285         write_lock_bh(&policy->lock);
286         dead = policy->dead;
287         policy->dead = 1;
288         write_unlock_bh(&policy->lock);
289
290         if (unlikely(dead)) {
291                 WARN_ON(1);
292                 return;
293         }
294
295         spin_lock(&xfrm_policy_gc_lock);
296         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
297         spin_unlock(&xfrm_policy_gc_lock);
298
299         schedule_work(&xfrm_policy_gc_work);
300 }
301
302 struct xfrm_policy_hash {
303         struct hlist_head       *table;
304         unsigned int            hmask;
305 };
306
307 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
308 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
309 static struct hlist_head *xfrm_policy_byidx __read_mostly;
310 static unsigned int xfrm_idx_hmask __read_mostly;
311 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
312
313 static inline unsigned int idx_hash(u32 index)
314 {
315         return __idx_hash(index, xfrm_idx_hmask);
316 }
317
318 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
319 {
320         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
321         unsigned int hash = __sel_hash(sel, family, hmask);
322
323         return (hash == hmask + 1 ?
324                 &xfrm_policy_inexact[dir] :
325                 xfrm_policy_bydst[dir].table + hash);
326 }
327
328 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
329 {
330         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
331         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
332
333         return xfrm_policy_bydst[dir].table + hash;
334 }
335
336 static void xfrm_dst_hash_transfer(struct hlist_head *list,
337                                    struct hlist_head *ndsttable,
338                                    unsigned int nhashmask)
339 {
340         struct hlist_node *entry, *tmp, *entry0 = NULL;
341         struct xfrm_policy *pol;
342         unsigned int h0 = 0;
343
344 redo:
345         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
346                 unsigned int h;
347
348                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
349                                 pol->family, nhashmask);
350                 if (!entry0) {
351                         hlist_del(entry);
352                         hlist_add_head(&pol->bydst, ndsttable+h);
353                         h0 = h;
354                 } else {
355                         if (h != h0)
356                                 continue;
357                         hlist_del(entry);
358                         hlist_add_after(entry0, &pol->bydst);
359                 }
360                 entry0 = entry;
361         }
362         if (!hlist_empty(list)) {
363                 entry0 = NULL;
364                 goto redo;
365         }
366 }
367
368 static void xfrm_idx_hash_transfer(struct hlist_head *list,
369                                    struct hlist_head *nidxtable,
370                                    unsigned int nhashmask)
371 {
372         struct hlist_node *entry, *tmp;
373         struct xfrm_policy *pol;
374
375         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
376                 unsigned int h;
377
378                 h = __idx_hash(pol->index, nhashmask);
379                 hlist_add_head(&pol->byidx, nidxtable+h);
380         }
381 }
382
383 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
384 {
385         return ((old_hmask + 1) << 1) - 1;
386 }
387
388 static void xfrm_bydst_resize(int dir)
389 {
390         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
391         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
392         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
393         struct hlist_head *odst = xfrm_policy_bydst[dir].table;
394         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
395         int i;
396
397         if (!ndst)
398                 return;
399
400         write_lock_bh(&xfrm_policy_lock);
401
402         for (i = hmask; i >= 0; i--)
403                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
404
405         xfrm_policy_bydst[dir].table = ndst;
406         xfrm_policy_bydst[dir].hmask = nhashmask;
407
408         write_unlock_bh(&xfrm_policy_lock);
409
410         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
411 }
412
413 static void xfrm_byidx_resize(int total)
414 {
415         unsigned int hmask = xfrm_idx_hmask;
416         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
417         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
418         struct hlist_head *oidx = xfrm_policy_byidx;
419         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
420         int i;
421
422         if (!nidx)
423                 return;
424
425         write_lock_bh(&xfrm_policy_lock);
426
427         for (i = hmask; i >= 0; i--)
428                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
429
430         xfrm_policy_byidx = nidx;
431         xfrm_idx_hmask = nhashmask;
432
433         write_unlock_bh(&xfrm_policy_lock);
434
435         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
436 }
437
438 static inline int xfrm_bydst_should_resize(int dir, int *total)
439 {
440         unsigned int cnt = xfrm_policy_count[dir];
441         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
442
443         if (total)
444                 *total += cnt;
445
446         if ((hmask + 1) < xfrm_policy_hashmax &&
447             cnt > hmask)
448                 return 1;
449
450         return 0;
451 }
452
453 static inline int xfrm_byidx_should_resize(int total)
454 {
455         unsigned int hmask = xfrm_idx_hmask;
456
457         if ((hmask + 1) < xfrm_policy_hashmax &&
458             total > hmask)
459                 return 1;
460
461         return 0;
462 }
463
464 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
465 {
466         read_lock_bh(&xfrm_policy_lock);
467         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
468         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
469         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
470         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
471         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
472         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
473         si->spdhcnt = xfrm_idx_hmask;
474         si->spdhmcnt = xfrm_policy_hashmax;
475         read_unlock_bh(&xfrm_policy_lock);
476 }
477 EXPORT_SYMBOL(xfrm_spd_getinfo);
478
479 static DEFINE_MUTEX(hash_resize_mutex);
480 static void xfrm_hash_resize(struct work_struct *__unused)
481 {
482         int dir, total;
483
484         mutex_lock(&hash_resize_mutex);
485
486         total = 0;
487         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
488                 if (xfrm_bydst_should_resize(dir, &total))
489                         xfrm_bydst_resize(dir);
490         }
491         if (xfrm_byidx_should_resize(total))
492                 xfrm_byidx_resize(total);
493
494         mutex_unlock(&hash_resize_mutex);
495 }
496
497 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
498
499 /* Generate new index... KAME seems to generate them ordered by cost
500  * of an absolute inpredictability of ordering of rules. This will not pass. */
501 static u32 xfrm_gen_index(u8 type, int dir)
502 {
503         static u32 idx_generator;
504
505         for (;;) {
506                 struct hlist_node *entry;
507                 struct hlist_head *list;
508                 struct xfrm_policy *p;
509                 u32 idx;
510                 int found;
511
512                 idx = (idx_generator | dir);
513                 idx_generator += 8;
514                 if (idx == 0)
515                         idx = 8;
516                 list = xfrm_policy_byidx + idx_hash(idx);
517                 found = 0;
518                 hlist_for_each_entry(p, entry, list, byidx) {
519                         if (p->index == idx) {
520                                 found = 1;
521                                 break;
522                         }
523                 }
524                 if (!found)
525                         return idx;
526         }
527 }
528
529 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
530 {
531         u32 *p1 = (u32 *) s1;
532         u32 *p2 = (u32 *) s2;
533         int len = sizeof(struct xfrm_selector) / sizeof(u32);
534         int i;
535
536         for (i = 0; i < len; i++) {
537                 if (p1[i] != p2[i])
538                         return 1;
539         }
540
541         return 0;
542 }
543
544 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
545 {
546         struct xfrm_policy *pol;
547         struct xfrm_policy *delpol;
548         struct hlist_head *chain;
549         struct hlist_node *entry, *newpos;
550         struct dst_entry *gc_list;
551
552         write_lock_bh(&xfrm_policy_lock);
553         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
554         delpol = NULL;
555         newpos = NULL;
556         hlist_for_each_entry(pol, entry, chain, bydst) {
557                 if (pol->type == policy->type &&
558                     !selector_cmp(&pol->selector, &policy->selector) &&
559                     xfrm_sec_ctx_match(pol->security, policy->security) &&
560                     !WARN_ON(delpol)) {
561                         if (excl) {
562                                 write_unlock_bh(&xfrm_policy_lock);
563                                 return -EEXIST;
564                         }
565                         delpol = pol;
566                         if (policy->priority > pol->priority)
567                                 continue;
568                 } else if (policy->priority >= pol->priority) {
569                         newpos = &pol->bydst;
570                         continue;
571                 }
572                 if (delpol)
573                         break;
574         }
575         if (newpos)
576                 hlist_add_after(newpos, &policy->bydst);
577         else
578                 hlist_add_head(&policy->bydst, chain);
579         xfrm_pol_hold(policy);
580         xfrm_policy_count[dir]++;
581         atomic_inc(&flow_cache_genid);
582         if (delpol) {
583                 hlist_del(&delpol->bydst);
584                 hlist_del(&delpol->byidx);
585                 xfrm_policy_count[dir]--;
586         }
587         policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
588         hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
589         policy->curlft.add_time = get_seconds();
590         policy->curlft.use_time = 0;
591         if (!mod_timer(&policy->timer, jiffies + HZ))
592                 xfrm_pol_hold(policy);
593         list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]);
594         write_unlock_bh(&xfrm_policy_lock);
595
596         if (delpol)
597                 xfrm_policy_kill(delpol);
598         else if (xfrm_bydst_should_resize(dir, NULL))
599                 schedule_work(&xfrm_hash_work);
600
601         read_lock_bh(&xfrm_policy_lock);
602         gc_list = NULL;
603         entry = &policy->bydst;
604         hlist_for_each_entry_continue(policy, entry, bydst) {
605                 struct dst_entry *dst;
606
607                 write_lock(&policy->lock);
608                 dst = policy->bundles;
609                 if (dst) {
610                         struct dst_entry *tail = dst;
611                         while (tail->next)
612                                 tail = tail->next;
613                         tail->next = gc_list;
614                         gc_list = dst;
615
616                         policy->bundles = NULL;
617                 }
618                 write_unlock(&policy->lock);
619         }
620         read_unlock_bh(&xfrm_policy_lock);
621
622         while (gc_list) {
623                 struct dst_entry *dst = gc_list;
624
625                 gc_list = dst->next;
626                 dst_free(dst);
627         }
628
629         return 0;
630 }
631 EXPORT_SYMBOL(xfrm_policy_insert);
632
633 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
634                                           struct xfrm_selector *sel,
635                                           struct xfrm_sec_ctx *ctx, int delete,
636                                           int *err)
637 {
638         struct xfrm_policy *pol, *ret;
639         struct hlist_head *chain;
640         struct hlist_node *entry;
641
642         *err = 0;
643         write_lock_bh(&xfrm_policy_lock);
644         chain = policy_hash_bysel(sel, sel->family, dir);
645         ret = NULL;
646         hlist_for_each_entry(pol, entry, chain, bydst) {
647                 if (pol->type == type &&
648                     !selector_cmp(sel, &pol->selector) &&
649                     xfrm_sec_ctx_match(ctx, pol->security)) {
650                         xfrm_pol_hold(pol);
651                         if (delete) {
652                                 *err = security_xfrm_policy_delete(pol);
653                                 if (*err) {
654                                         write_unlock_bh(&xfrm_policy_lock);
655                                         return pol;
656                                 }
657                                 hlist_del(&pol->bydst);
658                                 hlist_del(&pol->byidx);
659                                 xfrm_policy_count[dir]--;
660                         }
661                         ret = pol;
662                         break;
663                 }
664         }
665         write_unlock_bh(&xfrm_policy_lock);
666
667         if (ret && delete) {
668                 atomic_inc(&flow_cache_genid);
669                 xfrm_policy_kill(ret);
670         }
671         return ret;
672 }
673 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
674
675 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
676                                      int *err)
677 {
678         struct xfrm_policy *pol, *ret;
679         struct hlist_head *chain;
680         struct hlist_node *entry;
681
682         *err = -ENOENT;
683         if (xfrm_policy_id2dir(id) != dir)
684                 return NULL;
685
686         *err = 0;
687         write_lock_bh(&xfrm_policy_lock);
688         chain = xfrm_policy_byidx + idx_hash(id);
689         ret = NULL;
690         hlist_for_each_entry(pol, entry, chain, byidx) {
691                 if (pol->type == type && pol->index == id) {
692                         xfrm_pol_hold(pol);
693                         if (delete) {
694                                 *err = security_xfrm_policy_delete(pol);
695                                 if (*err) {
696                                         write_unlock_bh(&xfrm_policy_lock);
697                                         return pol;
698                                 }
699                                 hlist_del(&pol->bydst);
700                                 hlist_del(&pol->byidx);
701                                 xfrm_policy_count[dir]--;
702                         }
703                         ret = pol;
704                         break;
705                 }
706         }
707         write_unlock_bh(&xfrm_policy_lock);
708
709         if (ret && delete) {
710                 atomic_inc(&flow_cache_genid);
711                 xfrm_policy_kill(ret);
712         }
713         return ret;
714 }
715 EXPORT_SYMBOL(xfrm_policy_byid);
716
717 #ifdef CONFIG_SECURITY_NETWORK_XFRM
718 static inline int
719 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
720 {
721         int dir, err = 0;
722
723         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
724                 struct xfrm_policy *pol;
725                 struct hlist_node *entry;
726                 int i;
727
728                 hlist_for_each_entry(pol, entry,
729                                      &xfrm_policy_inexact[dir], bydst) {
730                         if (pol->type != type)
731                                 continue;
732                         err = security_xfrm_policy_delete(pol);
733                         if (err) {
734                                 xfrm_audit_policy_delete(pol, 0,
735                                                          audit_info->loginuid,
736                                                          audit_info->secid);
737                                 return err;
738                         }
739                 }
740                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
741                         hlist_for_each_entry(pol, entry,
742                                              xfrm_policy_bydst[dir].table + i,
743                                              bydst) {
744                                 if (pol->type != type)
745                                         continue;
746                                 err = security_xfrm_policy_delete(pol);
747                                 if (err) {
748                                         xfrm_audit_policy_delete(pol, 0,
749                                                         audit_info->loginuid,
750                                                         audit_info->secid);
751                                         return err;
752                                 }
753                         }
754                 }
755         }
756         return err;
757 }
758 #else
759 static inline int
760 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
761 {
762         return 0;
763 }
764 #endif
765
766 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
767 {
768         int dir, err = 0;
769
770         write_lock_bh(&xfrm_policy_lock);
771
772         err = xfrm_policy_flush_secctx_check(type, audit_info);
773         if (err)
774                 goto out;
775
776         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
777                 struct xfrm_policy *pol;
778                 struct hlist_node *entry;
779                 int i, killed;
780
781                 killed = 0;
782         again1:
783                 hlist_for_each_entry(pol, entry,
784                                      &xfrm_policy_inexact[dir], bydst) {
785                         if (pol->type != type)
786                                 continue;
787                         hlist_del(&pol->bydst);
788                         hlist_del(&pol->byidx);
789                         write_unlock_bh(&xfrm_policy_lock);
790
791                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
792                                                  audit_info->secid);
793
794                         xfrm_policy_kill(pol);
795                         killed++;
796
797                         write_lock_bh(&xfrm_policy_lock);
798                         goto again1;
799                 }
800
801                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
802         again2:
803                         hlist_for_each_entry(pol, entry,
804                                              xfrm_policy_bydst[dir].table + i,
805                                              bydst) {
806                                 if (pol->type != type)
807                                         continue;
808                                 hlist_del(&pol->bydst);
809                                 hlist_del(&pol->byidx);
810                                 write_unlock_bh(&xfrm_policy_lock);
811
812                                 xfrm_audit_policy_delete(pol, 1,
813                                                          audit_info->loginuid,
814                                                          audit_info->secid);
815                                 xfrm_policy_kill(pol);
816                                 killed++;
817
818                                 write_lock_bh(&xfrm_policy_lock);
819                                 goto again2;
820                         }
821                 }
822
823                 xfrm_policy_count[dir] -= killed;
824         }
825         atomic_inc(&flow_cache_genid);
826 out:
827         write_unlock_bh(&xfrm_policy_lock);
828         return err;
829 }
830 EXPORT_SYMBOL(xfrm_policy_flush);
831
832 int xfrm_policy_walk(struct xfrm_policy_walk *walk,
833                      int (*func)(struct xfrm_policy *, int, int, void*),
834                      void *data)
835 {
836         struct xfrm_policy *old, *pol, *last = NULL;
837         int error = 0;
838
839         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
840             walk->type != XFRM_POLICY_TYPE_ANY)
841                 return -EINVAL;
842
843         if (walk->policy == NULL && walk->count != 0)
844                 return 0;
845
846         old = pol = walk->policy;
847         walk->policy = NULL;
848         read_lock_bh(&xfrm_policy_lock);
849
850         for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) {
851                 if (walk->type != walk->cur_type &&
852                     walk->type != XFRM_POLICY_TYPE_ANY)
853                         continue;
854
855                 if (pol == NULL) {
856                         pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type],
857                                                struct xfrm_policy, bytype);
858                 }
859                 list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) {
860                         if (pol->dead)
861                                 continue;
862                         if (last) {
863                                 error = func(last, xfrm_policy_id2dir(last->index),
864                                              walk->count, data);
865                                 if (error) {
866                                         xfrm_pol_hold(last);
867                                         walk->policy = last;
868                                         goto out;
869                                 }
870                         }
871                         last = pol;
872                         walk->count++;
873                 }
874                 pol = NULL;
875         }
876         if (walk->count == 0) {
877                 error = -ENOENT;
878                 goto out;
879         }
880         if (last)
881                 error = func(last, xfrm_policy_id2dir(last->index), 0, data);
882 out:
883         read_unlock_bh(&xfrm_policy_lock);
884         if (old != NULL)
885                 xfrm_pol_put(old);
886         return error;
887 }
888 EXPORT_SYMBOL(xfrm_policy_walk);
889
890 /*
891  * Find policy to apply to this flow.
892  *
893  * Returns 0 if policy found, else an -errno.
894  */
895 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
896                              u8 type, u16 family, int dir)
897 {
898         struct xfrm_selector *sel = &pol->selector;
899         int match, ret = -ESRCH;
900
901         if (pol->family != family ||
902             pol->type != type)
903                 return ret;
904
905         match = xfrm_selector_match(sel, fl, family);
906         if (match)
907                 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
908
909         return ret;
910 }
911
912 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
913                                                      u16 family, u8 dir)
914 {
915         int err;
916         struct xfrm_policy *pol, *ret;
917         xfrm_address_t *daddr, *saddr;
918         struct hlist_node *entry;
919         struct hlist_head *chain;
920         u32 priority = ~0U;
921
922         daddr = xfrm_flowi_daddr(fl, family);
923         saddr = xfrm_flowi_saddr(fl, family);
924         if (unlikely(!daddr || !saddr))
925                 return NULL;
926
927         read_lock_bh(&xfrm_policy_lock);
928         chain = policy_hash_direct(daddr, saddr, family, dir);
929         ret = NULL;
930         hlist_for_each_entry(pol, entry, chain, bydst) {
931                 err = xfrm_policy_match(pol, fl, type, family, dir);
932                 if (err) {
933                         if (err == -ESRCH)
934                                 continue;
935                         else {
936                                 ret = ERR_PTR(err);
937                                 goto fail;
938                         }
939                 } else {
940                         ret = pol;
941                         priority = ret->priority;
942                         break;
943                 }
944         }
945         chain = &xfrm_policy_inexact[dir];
946         hlist_for_each_entry(pol, entry, chain, bydst) {
947                 err = xfrm_policy_match(pol, fl, type, family, dir);
948                 if (err) {
949                         if (err == -ESRCH)
950                                 continue;
951                         else {
952                                 ret = ERR_PTR(err);
953                                 goto fail;
954                         }
955                 } else if (pol->priority < priority) {
956                         ret = pol;
957                         break;
958                 }
959         }
960         if (ret)
961                 xfrm_pol_hold(ret);
962 fail:
963         read_unlock_bh(&xfrm_policy_lock);
964
965         return ret;
966 }
967
968 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
969                                void **objp, atomic_t **obj_refp)
970 {
971         struct xfrm_policy *pol;
972         int err = 0;
973
974 #ifdef CONFIG_XFRM_SUB_POLICY
975         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
976         if (IS_ERR(pol)) {
977                 err = PTR_ERR(pol);
978                 pol = NULL;
979         }
980         if (pol || err)
981                 goto end;
982 #endif
983         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
984         if (IS_ERR(pol)) {
985                 err = PTR_ERR(pol);
986                 pol = NULL;
987         }
988 #ifdef CONFIG_XFRM_SUB_POLICY
989 end:
990 #endif
991         if ((*objp = (void *) pol) != NULL)
992                 *obj_refp = &pol->refcnt;
993         return err;
994 }
995
996 static inline int policy_to_flow_dir(int dir)
997 {
998         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
999             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1000             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1001                 return dir;
1002         switch (dir) {
1003         default:
1004         case XFRM_POLICY_IN:
1005                 return FLOW_DIR_IN;
1006         case XFRM_POLICY_OUT:
1007                 return FLOW_DIR_OUT;
1008         case XFRM_POLICY_FWD:
1009                 return FLOW_DIR_FWD;
1010         }
1011 }
1012
1013 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1014 {
1015         struct xfrm_policy *pol;
1016
1017         read_lock_bh(&xfrm_policy_lock);
1018         if ((pol = sk->sk_policy[dir]) != NULL) {
1019                 int match = xfrm_selector_match(&pol->selector, fl,
1020                                                 sk->sk_family);
1021                 int err = 0;
1022
1023                 if (match) {
1024                         err = security_xfrm_policy_lookup(pol, fl->secid,
1025                                         policy_to_flow_dir(dir));
1026                         if (!err)
1027                                 xfrm_pol_hold(pol);
1028                         else if (err == -ESRCH)
1029                                 pol = NULL;
1030                         else
1031                                 pol = ERR_PTR(err);
1032                 } else
1033                         pol = NULL;
1034         }
1035         read_unlock_bh(&xfrm_policy_lock);
1036         return pol;
1037 }
1038
1039 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1040 {
1041         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1042                                                      pol->family, dir);
1043
1044         hlist_add_head(&pol->bydst, chain);
1045         hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1046         xfrm_policy_count[dir]++;
1047         xfrm_pol_hold(pol);
1048
1049         if (xfrm_bydst_should_resize(dir, NULL))
1050                 schedule_work(&xfrm_hash_work);
1051 }
1052
1053 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1054                                                 int dir)
1055 {
1056         if (hlist_unhashed(&pol->bydst))
1057                 return NULL;
1058
1059         hlist_del(&pol->bydst);
1060         hlist_del(&pol->byidx);
1061         xfrm_policy_count[dir]--;
1062
1063         return pol;
1064 }
1065
1066 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1067 {
1068         write_lock_bh(&xfrm_policy_lock);
1069         pol = __xfrm_policy_unlink(pol, dir);
1070         write_unlock_bh(&xfrm_policy_lock);
1071         if (pol) {
1072                 if (dir < XFRM_POLICY_MAX)
1073                         atomic_inc(&flow_cache_genid);
1074                 xfrm_policy_kill(pol);
1075                 return 0;
1076         }
1077         return -ENOENT;
1078 }
1079 EXPORT_SYMBOL(xfrm_policy_delete);
1080
1081 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1082 {
1083         struct xfrm_policy *old_pol;
1084
1085 #ifdef CONFIG_XFRM_SUB_POLICY
1086         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1087                 return -EINVAL;
1088 #endif
1089
1090         write_lock_bh(&xfrm_policy_lock);
1091         old_pol = sk->sk_policy[dir];
1092         sk->sk_policy[dir] = pol;
1093         if (pol) {
1094                 pol->curlft.add_time = get_seconds();
1095                 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1096                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1097         }
1098         if (old_pol)
1099                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1100         write_unlock_bh(&xfrm_policy_lock);
1101
1102         if (old_pol) {
1103                 xfrm_policy_kill(old_pol);
1104         }
1105         return 0;
1106 }
1107
1108 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1109 {
1110         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1111
1112         if (newp) {
1113                 newp->selector = old->selector;
1114                 if (security_xfrm_policy_clone(old, newp)) {
1115                         kfree(newp);
1116                         return NULL;  /* ENOMEM */
1117                 }
1118                 newp->lft = old->lft;
1119                 newp->curlft = old->curlft;
1120                 newp->action = old->action;
1121                 newp->flags = old->flags;
1122                 newp->xfrm_nr = old->xfrm_nr;
1123                 newp->index = old->index;
1124                 newp->type = old->type;
1125                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1126                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1127                 write_lock_bh(&xfrm_policy_lock);
1128                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1129                 write_unlock_bh(&xfrm_policy_lock);
1130                 xfrm_pol_put(newp);
1131         }
1132         return newp;
1133 }
1134
1135 int __xfrm_sk_clone_policy(struct sock *sk)
1136 {
1137         struct xfrm_policy *p0 = sk->sk_policy[0],
1138                            *p1 = sk->sk_policy[1];
1139
1140         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1141         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1142                 return -ENOMEM;
1143         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1144                 return -ENOMEM;
1145         return 0;
1146 }
1147
1148 static int
1149 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1150                unsigned short family)
1151 {
1152         int err;
1153         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1154
1155         if (unlikely(afinfo == NULL))
1156                 return -EINVAL;
1157         err = afinfo->get_saddr(local, remote);
1158         xfrm_policy_put_afinfo(afinfo);
1159         return err;
1160 }
1161
1162 /* Resolve list of templates for the flow, given policy. */
1163
1164 static int
1165 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1166                       struct xfrm_state **xfrm,
1167                       unsigned short family)
1168 {
1169         int nx;
1170         int i, error;
1171         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1172         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1173         xfrm_address_t tmp;
1174
1175         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1176                 struct xfrm_state *x;
1177                 xfrm_address_t *remote = daddr;
1178                 xfrm_address_t *local  = saddr;
1179                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1180
1181                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1182                     tmpl->mode == XFRM_MODE_BEET) {
1183                         remote = &tmpl->id.daddr;
1184                         local = &tmpl->saddr;
1185                         family = tmpl->encap_family;
1186                         if (xfrm_addr_any(local, family)) {
1187                                 error = xfrm_get_saddr(&tmp, remote, family);
1188                                 if (error)
1189                                         goto fail;
1190                                 local = &tmp;
1191                         }
1192                 }
1193
1194                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1195
1196                 if (x && x->km.state == XFRM_STATE_VALID) {
1197                         xfrm[nx++] = x;
1198                         daddr = remote;
1199                         saddr = local;
1200                         continue;
1201                 }
1202                 if (x) {
1203                         error = (x->km.state == XFRM_STATE_ERROR ?
1204                                  -EINVAL : -EAGAIN);
1205                         xfrm_state_put(x);
1206                 }
1207
1208                 if (!tmpl->optional)
1209                         goto fail;
1210         }
1211         return nx;
1212
1213 fail:
1214         for (nx--; nx>=0; nx--)
1215                 xfrm_state_put(xfrm[nx]);
1216         return error;
1217 }
1218
1219 static int
1220 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1221                   struct xfrm_state **xfrm,
1222                   unsigned short family)
1223 {
1224         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1225         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1226         int cnx = 0;
1227         int error;
1228         int ret;
1229         int i;
1230
1231         for (i = 0; i < npols; i++) {
1232                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1233                         error = -ENOBUFS;
1234                         goto fail;
1235                 }
1236
1237                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1238                 if (ret < 0) {
1239                         error = ret;
1240                         goto fail;
1241                 } else
1242                         cnx += ret;
1243         }
1244
1245         /* found states are sorted for outbound processing */
1246         if (npols > 1)
1247                 xfrm_state_sort(xfrm, tpp, cnx, family);
1248
1249         return cnx;
1250
1251  fail:
1252         for (cnx--; cnx>=0; cnx--)
1253                 xfrm_state_put(tpp[cnx]);
1254         return error;
1255
1256 }
1257
1258 /* Check that the bundle accepts the flow and its components are
1259  * still valid.
1260  */
1261
1262 static struct dst_entry *
1263 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1264 {
1265         struct dst_entry *x;
1266         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1267         if (unlikely(afinfo == NULL))
1268                 return ERR_PTR(-EINVAL);
1269         x = afinfo->find_bundle(fl, policy);
1270         xfrm_policy_put_afinfo(afinfo);
1271         return x;
1272 }
1273
1274 static inline int xfrm_get_tos(struct flowi *fl, int family)
1275 {
1276         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1277         int tos;
1278
1279         if (!afinfo)
1280                 return -EINVAL;
1281
1282         tos = afinfo->get_tos(fl);
1283
1284         xfrm_policy_put_afinfo(afinfo);
1285
1286         return tos;
1287 }
1288
1289 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1290 {
1291         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1292         struct xfrm_dst *xdst;
1293
1294         if (!afinfo)
1295                 return ERR_PTR(-EINVAL);
1296
1297         xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1298
1299         xfrm_policy_put_afinfo(afinfo);
1300
1301         return xdst;
1302 }
1303
1304 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1305                                  int nfheader_len)
1306 {
1307         struct xfrm_policy_afinfo *afinfo =
1308                 xfrm_policy_get_afinfo(dst->ops->family);
1309         int err;
1310
1311         if (!afinfo)
1312                 return -EINVAL;
1313
1314         err = afinfo->init_path(path, dst, nfheader_len);
1315
1316         xfrm_policy_put_afinfo(afinfo);
1317
1318         return err;
1319 }
1320
1321 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1322 {
1323         struct xfrm_policy_afinfo *afinfo =
1324                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1325         int err;
1326
1327         if (!afinfo)
1328                 return -EINVAL;
1329
1330         err = afinfo->fill_dst(xdst, dev);
1331
1332         xfrm_policy_put_afinfo(afinfo);
1333
1334         return err;
1335 }
1336
1337 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1338  * all the metrics... Shortly, bundle a bundle.
1339  */
1340
1341 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1342                                             struct xfrm_state **xfrm, int nx,
1343                                             struct flowi *fl,
1344                                             struct dst_entry *dst)
1345 {
1346         unsigned long now = jiffies;
1347         struct net_device *dev;
1348         struct dst_entry *dst_prev = NULL;
1349         struct dst_entry *dst0 = NULL;
1350         int i = 0;
1351         int err;
1352         int header_len = 0;
1353         int nfheader_len = 0;
1354         int trailer_len = 0;
1355         int tos;
1356         int family = policy->selector.family;
1357
1358         tos = xfrm_get_tos(fl, family);
1359         err = tos;
1360         if (tos < 0)
1361                 goto put_states;
1362
1363         dst_hold(dst);
1364
1365         for (; i < nx; i++) {
1366                 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1367                 struct dst_entry *dst1 = &xdst->u.dst;
1368
1369                 err = PTR_ERR(xdst);
1370                 if (IS_ERR(xdst)) {
1371                         dst_release(dst);
1372                         goto put_states;
1373                 }
1374
1375                 if (!dst_prev)
1376                         dst0 = dst1;
1377                 else {
1378                         dst_prev->child = dst_clone(dst1);
1379                         dst1->flags |= DST_NOHASH;
1380                 }
1381
1382                 xdst->route = dst;
1383                 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1384
1385                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1386                         family = xfrm[i]->props.family;
1387                         dst = xfrm_dst_lookup(xfrm[i], tos, family);
1388                         err = PTR_ERR(dst);
1389                         if (IS_ERR(dst))
1390                                 goto put_states;
1391                 } else
1392                         dst_hold(dst);
1393
1394                 dst1->xfrm = xfrm[i];
1395                 xdst->genid = xfrm[i]->genid;
1396
1397                 dst1->obsolete = -1;
1398                 dst1->flags |= DST_HOST;
1399                 dst1->lastuse = now;
1400
1401                 dst1->input = dst_discard;
1402                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1403
1404                 dst1->next = dst_prev;
1405                 dst_prev = dst1;
1406
1407                 header_len += xfrm[i]->props.header_len;
1408                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1409                         nfheader_len += xfrm[i]->props.header_len;
1410                 trailer_len += xfrm[i]->props.trailer_len;
1411         }
1412
1413         dst_prev->child = dst;
1414         dst0->path = dst;
1415
1416         err = -ENODEV;
1417         dev = dst->dev;
1418         if (!dev)
1419                 goto free_dst;
1420
1421         /* Copy neighbout for reachability confirmation */
1422         dst0->neighbour = neigh_clone(dst->neighbour);
1423
1424         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1425         xfrm_init_pmtu(dst_prev);
1426
1427         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1428                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1429
1430                 err = xfrm_fill_dst(xdst, dev);
1431                 if (err)
1432                         goto free_dst;
1433
1434                 dst_prev->header_len = header_len;
1435                 dst_prev->trailer_len = trailer_len;
1436                 header_len -= xdst->u.dst.xfrm->props.header_len;
1437                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1438         }
1439
1440 out:
1441         return dst0;
1442
1443 put_states:
1444         for (; i < nx; i++)
1445                 xfrm_state_put(xfrm[i]);
1446 free_dst:
1447         if (dst0)
1448                 dst_free(dst0);
1449         dst0 = ERR_PTR(err);
1450         goto out;
1451 }
1452
1453 static int inline
1454 xfrm_dst_alloc_copy(void **target, void *src, int size)
1455 {
1456         if (!*target) {
1457                 *target = kmalloc(size, GFP_ATOMIC);
1458                 if (!*target)
1459                         return -ENOMEM;
1460         }
1461         memcpy(*target, src, size);
1462         return 0;
1463 }
1464
1465 static int inline
1466 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1467 {
1468 #ifdef CONFIG_XFRM_SUB_POLICY
1469         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1470         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1471                                    sel, sizeof(*sel));
1472 #else
1473         return 0;
1474 #endif
1475 }
1476
1477 static int inline
1478 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1479 {
1480 #ifdef CONFIG_XFRM_SUB_POLICY
1481         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1482         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1483 #else
1484         return 0;
1485 #endif
1486 }
1487
1488 static int stale_bundle(struct dst_entry *dst);
1489
1490 /* Main function: finds/creates a bundle for given flow.
1491  *
1492  * At the moment we eat a raw IP route. Mostly to speed up lookups
1493  * on interfaces with disabled IPsec.
1494  */
1495 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1496                   struct sock *sk, int flags)
1497 {
1498         struct xfrm_policy *policy;
1499         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1500         int npols;
1501         int pol_dead;
1502         int xfrm_nr;
1503         int pi;
1504         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1505         struct dst_entry *dst, *dst_orig = *dst_p;
1506         int nx = 0;
1507         int err;
1508         u32 genid;
1509         u16 family;
1510         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1511
1512 restart:
1513         genid = atomic_read(&flow_cache_genid);
1514         policy = NULL;
1515         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1516                 pols[pi] = NULL;
1517         npols = 0;
1518         pol_dead = 0;
1519         xfrm_nr = 0;
1520
1521         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1522                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1523                 err = PTR_ERR(policy);
1524                 if (IS_ERR(policy)) {
1525                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1526                         goto dropdst;
1527                 }
1528         }
1529
1530         if (!policy) {
1531                 /* To accelerate a bit...  */
1532                 if ((dst_orig->flags & DST_NOXFRM) ||
1533                     !xfrm_policy_count[XFRM_POLICY_OUT])
1534                         goto nopol;
1535
1536                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1537                                            dir, xfrm_policy_lookup);
1538                 err = PTR_ERR(policy);
1539                 if (IS_ERR(policy)) {
1540                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1541                         goto dropdst;
1542                 }
1543         }
1544
1545         if (!policy)
1546                 goto nopol;
1547
1548         family = dst_orig->ops->family;
1549         pols[0] = policy;
1550         npols ++;
1551         xfrm_nr += pols[0]->xfrm_nr;
1552
1553         err = -ENOENT;
1554         if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1555                 goto error;
1556
1557         policy->curlft.use_time = get_seconds();
1558
1559         switch (policy->action) {
1560         default:
1561         case XFRM_POLICY_BLOCK:
1562                 /* Prohibit the flow */
1563                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1564                 err = -EPERM;
1565                 goto error;
1566
1567         case XFRM_POLICY_ALLOW:
1568 #ifndef CONFIG_XFRM_SUB_POLICY
1569                 if (policy->xfrm_nr == 0) {
1570                         /* Flow passes not transformed. */
1571                         xfrm_pol_put(policy);
1572                         return 0;
1573                 }
1574 #endif
1575
1576                 /* Try to find matching bundle.
1577                  *
1578                  * LATER: help from flow cache. It is optional, this
1579                  * is required only for output policy.
1580                  */
1581                 dst = xfrm_find_bundle(fl, policy, family);
1582                 if (IS_ERR(dst)) {
1583                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1584                         err = PTR_ERR(dst);
1585                         goto error;
1586                 }
1587
1588                 if (dst)
1589                         break;
1590
1591 #ifdef CONFIG_XFRM_SUB_POLICY
1592                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1593                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1594                                                             fl, family,
1595                                                             XFRM_POLICY_OUT);
1596                         if (pols[1]) {
1597                                 if (IS_ERR(pols[1])) {
1598                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1599                                         err = PTR_ERR(pols[1]);
1600                                         goto error;
1601                                 }
1602                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1603                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1604                                         err = -EPERM;
1605                                         goto error;
1606                                 }
1607                                 npols ++;
1608                                 xfrm_nr += pols[1]->xfrm_nr;
1609                         }
1610                 }
1611
1612                 /*
1613                  * Because neither flowi nor bundle information knows about
1614                  * transformation template size. On more than one policy usage
1615                  * we can realize whether all of them is bypass or not after
1616                  * they are searched. See above not-transformed bypass
1617                  * is surrounded by non-sub policy configuration, too.
1618                  */
1619                 if (xfrm_nr == 0) {
1620                         /* Flow passes not transformed. */
1621                         xfrm_pols_put(pols, npols);
1622                         return 0;
1623                 }
1624
1625 #endif
1626                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1627
1628                 if (unlikely(nx<0)) {
1629                         err = nx;
1630                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1631                                 /* EREMOTE tells the caller to generate
1632                                  * a one-shot blackhole route.
1633                                  */
1634                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1635                                 xfrm_pol_put(policy);
1636                                 return -EREMOTE;
1637                         }
1638                         if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1639                                 DECLARE_WAITQUEUE(wait, current);
1640
1641                                 add_wait_queue(&km_waitq, &wait);
1642                                 set_current_state(TASK_INTERRUPTIBLE);
1643                                 schedule();
1644                                 set_current_state(TASK_RUNNING);
1645                                 remove_wait_queue(&km_waitq, &wait);
1646
1647                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1648
1649                                 if (nx == -EAGAIN && signal_pending(current)) {
1650                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1651                                         err = -ERESTART;
1652                                         goto error;
1653                                 }
1654                                 if (nx == -EAGAIN ||
1655                                     genid != atomic_read(&flow_cache_genid)) {
1656                                         xfrm_pols_put(pols, npols);
1657                                         goto restart;
1658                                 }
1659                                 err = nx;
1660                         }
1661                         if (err < 0) {
1662                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1663                                 goto error;
1664                         }
1665                 }
1666                 if (nx == 0) {
1667                         /* Flow passes not transformed. */
1668                         xfrm_pols_put(pols, npols);
1669                         return 0;
1670                 }
1671
1672                 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1673                 err = PTR_ERR(dst);
1674                 if (IS_ERR(dst)) {
1675                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1676                         goto error;
1677                 }
1678
1679                 for (pi = 0; pi < npols; pi++) {
1680                         read_lock_bh(&pols[pi]->lock);
1681                         pol_dead |= pols[pi]->dead;
1682                         read_unlock_bh(&pols[pi]->lock);
1683                 }
1684
1685                 write_lock_bh(&policy->lock);
1686                 if (unlikely(pol_dead || stale_bundle(dst))) {
1687                         /* Wow! While we worked on resolving, this
1688                          * policy has gone. Retry. It is not paranoia,
1689                          * we just cannot enlist new bundle to dead object.
1690                          * We can't enlist stable bundles either.
1691                          */
1692                         write_unlock_bh(&policy->lock);
1693                         if (dst)
1694                                 dst_free(dst);
1695
1696                         if (pol_dead)
1697                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1698                         else
1699                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1700                         err = -EHOSTUNREACH;
1701                         goto error;
1702                 }
1703
1704                 if (npols > 1)
1705                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1706                 else
1707                         err = xfrm_dst_update_origin(dst, fl);
1708                 if (unlikely(err)) {
1709                         write_unlock_bh(&policy->lock);
1710                         if (dst)
1711                                 dst_free(dst);
1712                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1713                         goto error;
1714                 }
1715
1716                 dst->next = policy->bundles;
1717                 policy->bundles = dst;
1718                 dst_hold(dst);
1719                 write_unlock_bh(&policy->lock);
1720         }
1721         *dst_p = dst;
1722         dst_release(dst_orig);
1723         xfrm_pols_put(pols, npols);
1724         return 0;
1725
1726 error:
1727         xfrm_pols_put(pols, npols);
1728 dropdst:
1729         dst_release(dst_orig);
1730         *dst_p = NULL;
1731         return err;
1732
1733 nopol:
1734         err = -ENOENT;
1735         if (flags & XFRM_LOOKUP_ICMP)
1736                 goto dropdst;
1737         return 0;
1738 }
1739 EXPORT_SYMBOL(__xfrm_lookup);
1740
1741 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1742                 struct sock *sk, int flags)
1743 {
1744         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1745
1746         if (err == -EREMOTE) {
1747                 dst_release(*dst_p);
1748                 *dst_p = NULL;
1749                 err = -EAGAIN;
1750         }
1751
1752         return err;
1753 }
1754 EXPORT_SYMBOL(xfrm_lookup);
1755
1756 static inline int
1757 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1758 {
1759         struct xfrm_state *x;
1760
1761         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1762                 return 0;
1763         x = skb->sp->xvec[idx];
1764         if (!x->type->reject)
1765                 return 0;
1766         return x->type->reject(x, skb, fl);
1767 }
1768
1769 /* When skb is transformed back to its "native" form, we have to
1770  * check policy restrictions. At the moment we make this in maximally
1771  * stupid way. Shame on me. :-) Of course, connected sockets must
1772  * have policy cached at them.
1773  */
1774
1775 static inline int
1776 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1777               unsigned short family)
1778 {
1779         if (xfrm_state_kern(x))
1780                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1781         return  x->id.proto == tmpl->id.proto &&
1782                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1783                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1784                 x->props.mode == tmpl->mode &&
1785                 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1786                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1787                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1788                   xfrm_state_addr_cmp(tmpl, x, family));
1789 }
1790
1791 /*
1792  * 0 or more than 0 is returned when validation is succeeded (either bypass
1793  * because of optional transport mode, or next index of the mathced secpath
1794  * state with the template.
1795  * -1 is returned when no matching template is found.
1796  * Otherwise "-2 - errored_index" is returned.
1797  */
1798 static inline int
1799 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1800                unsigned short family)
1801 {
1802         int idx = start;
1803
1804         if (tmpl->optional) {
1805                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1806                         return start;
1807         } else
1808                 start = -1;
1809         for (; idx < sp->len; idx++) {
1810                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1811                         return ++idx;
1812                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1813                         if (start == -1)
1814                                 start = -2-idx;
1815                         break;
1816                 }
1817         }
1818         return start;
1819 }
1820
1821 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1822                           unsigned int family, int reverse)
1823 {
1824         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1825         int err;
1826
1827         if (unlikely(afinfo == NULL))
1828                 return -EAFNOSUPPORT;
1829
1830         afinfo->decode_session(skb, fl, reverse);
1831         err = security_xfrm_decode_session(skb, &fl->secid);
1832         xfrm_policy_put_afinfo(afinfo);
1833         return err;
1834 }
1835 EXPORT_SYMBOL(__xfrm_decode_session);
1836
1837 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1838 {
1839         for (; k < sp->len; k++) {
1840                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1841                         *idxp = k;
1842                         return 1;
1843                 }
1844         }
1845
1846         return 0;
1847 }
1848
1849 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1850                         unsigned short family)
1851 {
1852         struct xfrm_policy *pol;
1853         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1854         int npols = 0;
1855         int xfrm_nr;
1856         int pi;
1857         int reverse;
1858         struct flowi fl;
1859         u8 fl_dir;
1860         int xerr_idx = -1;
1861
1862         reverse = dir & ~XFRM_POLICY_MASK;
1863         dir &= XFRM_POLICY_MASK;
1864         fl_dir = policy_to_flow_dir(dir);
1865
1866         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1867                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1868                 return 0;
1869         }
1870
1871         nf_nat_decode_session(skb, &fl, family);
1872
1873         /* First, check used SA against their selectors. */
1874         if (skb->sp) {
1875                 int i;
1876
1877                 for (i=skb->sp->len-1; i>=0; i--) {
1878                         struct xfrm_state *x = skb->sp->xvec[i];
1879                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
1880                                 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1881                                 return 0;
1882                         }
1883                 }
1884         }
1885
1886         pol = NULL;
1887         if (sk && sk->sk_policy[dir]) {
1888                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1889                 if (IS_ERR(pol)) {
1890                         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1891                         return 0;
1892                 }
1893         }
1894
1895         if (!pol)
1896                 pol = flow_cache_lookup(&fl, family, fl_dir,
1897                                         xfrm_policy_lookup);
1898
1899         if (IS_ERR(pol)) {
1900                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1901                 return 0;
1902         }
1903
1904         if (!pol) {
1905                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1906                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1907                         XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1908                         return 0;
1909                 }
1910                 return 1;
1911         }
1912
1913         pol->curlft.use_time = get_seconds();
1914
1915         pols[0] = pol;
1916         npols ++;
1917 #ifdef CONFIG_XFRM_SUB_POLICY
1918         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1919                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1920                                                     &fl, family,
1921                                                     XFRM_POLICY_IN);
1922                 if (pols[1]) {
1923                         if (IS_ERR(pols[1])) {
1924                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1925                                 return 0;
1926                         }
1927                         pols[1]->curlft.use_time = get_seconds();
1928                         npols ++;
1929                 }
1930         }
1931 #endif
1932
1933         if (pol->action == XFRM_POLICY_ALLOW) {
1934                 struct sec_path *sp;
1935                 static struct sec_path dummy;
1936                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1937                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1938                 struct xfrm_tmpl **tpp = tp;
1939                 int ti = 0;
1940                 int i, k;
1941
1942                 if ((sp = skb->sp) == NULL)
1943                         sp = &dummy;
1944
1945                 for (pi = 0; pi < npols; pi++) {
1946                         if (pols[pi] != pol &&
1947                             pols[pi]->action != XFRM_POLICY_ALLOW) {
1948                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1949                                 goto reject;
1950                         }
1951                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1952                                 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1953                                 goto reject_error;
1954                         }
1955                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1956                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1957                 }
1958                 xfrm_nr = ti;
1959                 if (npols > 1) {
1960                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1961                         tpp = stp;
1962                 }
1963
1964                 /* For each tunnel xfrm, find the first matching tmpl.
1965                  * For each tmpl before that, find corresponding xfrm.
1966                  * Order is _important_. Later we will implement
1967                  * some barriers, but at the moment barriers
1968                  * are implied between each two transformations.
1969                  */
1970                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1971                         k = xfrm_policy_ok(tpp[i], sp, k, family);
1972                         if (k < 0) {
1973                                 if (k < -1)
1974                                         /* "-2 - errored_index" returned */
1975                                         xerr_idx = -(2+k);
1976                                 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1977                                 goto reject;
1978                         }
1979                 }
1980
1981                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1982                         XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1983                         goto reject;
1984                 }
1985
1986                 xfrm_pols_put(pols, npols);
1987                 return 1;
1988         }
1989         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1990
1991 reject:
1992         xfrm_secpath_reject(xerr_idx, skb, &fl);
1993 reject_error:
1994         xfrm_pols_put(pols, npols);
1995         return 0;
1996 }
1997 EXPORT_SYMBOL(__xfrm_policy_check);
1998
1999 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2000 {
2001         struct flowi fl;
2002
2003         if (xfrm_decode_session(skb, &fl, family) < 0) {
2004                 /* XXX: we should have something like FWDHDRERROR here. */
2005                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
2006                 return 0;
2007         }
2008
2009         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2010 }
2011 EXPORT_SYMBOL(__xfrm_route_forward);
2012
2013 /* Optimize later using cookies and generation ids. */
2014
2015 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2016 {
2017         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2018          * to "-1" to force all XFRM destinations to get validated by
2019          * dst_ops->check on every use.  We do this because when a
2020          * normal route referenced by an XFRM dst is obsoleted we do
2021          * not go looking around for all parent referencing XFRM dsts
2022          * so that we can invalidate them.  It is just too much work.
2023          * Instead we make the checks here on every use.  For example:
2024          *
2025          *      XFRM dst A --> IPv4 dst X
2026          *
2027          * X is the "xdst->route" of A (X is also the "dst->path" of A
2028          * in this example).  If X is marked obsolete, "A" will not
2029          * notice.  That's what we are validating here via the
2030          * stale_bundle() check.
2031          *
2032          * When a policy's bundle is pruned, we dst_free() the XFRM
2033          * dst which causes it's ->obsolete field to be set to a
2034          * positive non-zero integer.  If an XFRM dst has been pruned
2035          * like this, we want to force a new route lookup.
2036          */
2037         if (dst->obsolete < 0 && !stale_bundle(dst))
2038                 return dst;
2039
2040         return NULL;
2041 }
2042
2043 static int stale_bundle(struct dst_entry *dst)
2044 {
2045         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2046 }
2047
2048 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2049 {
2050         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2051                 dst->dev = dev->nd_net->loopback_dev;
2052                 dev_hold(dst->dev);
2053                 dev_put(dev);
2054         }
2055 }
2056 EXPORT_SYMBOL(xfrm_dst_ifdown);
2057
2058 static void xfrm_link_failure(struct sk_buff *skb)
2059 {
2060         /* Impossible. Such dst must be popped before reaches point of failure. */
2061         return;
2062 }
2063
2064 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2065 {
2066         if (dst) {
2067                 if (dst->obsolete) {
2068                         dst_release(dst);
2069                         dst = NULL;
2070                 }
2071         }
2072         return dst;
2073 }
2074
2075 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2076 {
2077         struct dst_entry *dst, **dstp;
2078
2079         write_lock(&pol->lock);
2080         dstp = &pol->bundles;
2081         while ((dst=*dstp) != NULL) {
2082                 if (func(dst)) {
2083                         *dstp = dst->next;
2084                         dst->next = *gc_list_p;
2085                         *gc_list_p = dst;
2086                 } else {
2087                         dstp = &dst->next;
2088                 }
2089         }
2090         write_unlock(&pol->lock);
2091 }
2092
2093 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2094 {
2095         struct dst_entry *gc_list = NULL;
2096         int dir;
2097
2098         read_lock_bh(&xfrm_policy_lock);
2099         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2100                 struct xfrm_policy *pol;
2101                 struct hlist_node *entry;
2102                 struct hlist_head *table;
2103                 int i;
2104
2105                 hlist_for_each_entry(pol, entry,
2106                                      &xfrm_policy_inexact[dir], bydst)
2107                         prune_one_bundle(pol, func, &gc_list);
2108
2109                 table = xfrm_policy_bydst[dir].table;
2110                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2111                         hlist_for_each_entry(pol, entry, table + i, bydst)
2112                                 prune_one_bundle(pol, func, &gc_list);
2113                 }
2114         }
2115         read_unlock_bh(&xfrm_policy_lock);
2116
2117         while (gc_list) {
2118                 struct dst_entry *dst = gc_list;
2119                 gc_list = dst->next;
2120                 dst_free(dst);
2121         }
2122 }
2123
2124 static int unused_bundle(struct dst_entry *dst)
2125 {
2126         return !atomic_read(&dst->__refcnt);
2127 }
2128
2129 static void __xfrm_garbage_collect(void)
2130 {
2131         xfrm_prune_bundles(unused_bundle);
2132 }
2133
2134 static int xfrm_flush_bundles(void)
2135 {
2136         xfrm_prune_bundles(stale_bundle);
2137         return 0;
2138 }
2139
2140 static void xfrm_init_pmtu(struct dst_entry *dst)
2141 {
2142         do {
2143                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2144                 u32 pmtu, route_mtu_cached;
2145
2146                 pmtu = dst_mtu(dst->child);
2147                 xdst->child_mtu_cached = pmtu;
2148
2149                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2150
2151                 route_mtu_cached = dst_mtu(xdst->route);
2152                 xdst->route_mtu_cached = route_mtu_cached;
2153
2154                 if (pmtu > route_mtu_cached)
2155                         pmtu = route_mtu_cached;
2156
2157                 dst->metrics[RTAX_MTU-1] = pmtu;
2158         } while ((dst = dst->next));
2159 }
2160
2161 /* Check that the bundle accepts the flow and its components are
2162  * still valid.
2163  */
2164
2165 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2166                 struct flowi *fl, int family, int strict)
2167 {
2168         struct dst_entry *dst = &first->u.dst;
2169         struct xfrm_dst *last;
2170         u32 mtu;
2171
2172         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2173             (dst->dev && !netif_running(dst->dev)))
2174                 return 0;
2175 #ifdef CONFIG_XFRM_SUB_POLICY
2176         if (fl) {
2177                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2178                         return 0;
2179                 if (first->partner &&
2180                     !xfrm_selector_match(first->partner, fl, family))
2181                         return 0;
2182         }
2183 #endif
2184
2185         last = NULL;
2186
2187         do {
2188                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2189
2190                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2191                         return 0;
2192                 if (fl && pol &&
2193                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2194                         return 0;
2195                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2196                         return 0;
2197                 if (xdst->genid != dst->xfrm->genid)
2198                         return 0;
2199
2200                 if (strict && fl &&
2201                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2202                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2203                         return 0;
2204
2205                 mtu = dst_mtu(dst->child);
2206                 if (xdst->child_mtu_cached != mtu) {
2207                         last = xdst;
2208                         xdst->child_mtu_cached = mtu;
2209                 }
2210
2211                 if (!dst_check(xdst->route, xdst->route_cookie))
2212                         return 0;
2213                 mtu = dst_mtu(xdst->route);
2214                 if (xdst->route_mtu_cached != mtu) {
2215                         last = xdst;
2216                         xdst->route_mtu_cached = mtu;
2217                 }
2218
2219                 dst = dst->child;
2220         } while (dst->xfrm);
2221
2222         if (likely(!last))
2223                 return 1;
2224
2225         mtu = last->child_mtu_cached;
2226         for (;;) {
2227                 dst = &last->u.dst;
2228
2229                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2230                 if (mtu > last->route_mtu_cached)
2231                         mtu = last->route_mtu_cached;
2232                 dst->metrics[RTAX_MTU-1] = mtu;
2233
2234                 if (last == first)
2235                         break;
2236
2237                 last = (struct xfrm_dst *)last->u.dst.next;
2238                 last->child_mtu_cached = mtu;
2239         }
2240
2241         return 1;
2242 }
2243
2244 EXPORT_SYMBOL(xfrm_bundle_ok);
2245
2246 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2247 {
2248         int err = 0;
2249         if (unlikely(afinfo == NULL))
2250                 return -EINVAL;
2251         if (unlikely(afinfo->family >= NPROTO))
2252                 return -EAFNOSUPPORT;
2253         write_lock_bh(&xfrm_policy_afinfo_lock);
2254         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2255                 err = -ENOBUFS;
2256         else {
2257                 struct dst_ops *dst_ops = afinfo->dst_ops;
2258                 if (likely(dst_ops->kmem_cachep == NULL))
2259                         dst_ops->kmem_cachep = xfrm_dst_cache;
2260                 if (likely(dst_ops->check == NULL))
2261                         dst_ops->check = xfrm_dst_check;
2262                 if (likely(dst_ops->negative_advice == NULL))
2263                         dst_ops->negative_advice = xfrm_negative_advice;
2264                 if (likely(dst_ops->link_failure == NULL))
2265                         dst_ops->link_failure = xfrm_link_failure;
2266                 if (likely(afinfo->garbage_collect == NULL))
2267                         afinfo->garbage_collect = __xfrm_garbage_collect;
2268                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2269         }
2270         write_unlock_bh(&xfrm_policy_afinfo_lock);
2271         return err;
2272 }
2273 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2274
2275 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2276 {
2277         int err = 0;
2278         if (unlikely(afinfo == NULL))
2279                 return -EINVAL;
2280         if (unlikely(afinfo->family >= NPROTO))
2281                 return -EAFNOSUPPORT;
2282         write_lock_bh(&xfrm_policy_afinfo_lock);
2283         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2284                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2285                         err = -EINVAL;
2286                 else {
2287                         struct dst_ops *dst_ops = afinfo->dst_ops;
2288                         xfrm_policy_afinfo[afinfo->family] = NULL;
2289                         dst_ops->kmem_cachep = NULL;
2290                         dst_ops->check = NULL;
2291                         dst_ops->negative_advice = NULL;
2292                         dst_ops->link_failure = NULL;
2293                         afinfo->garbage_collect = NULL;
2294                 }
2295         }
2296         write_unlock_bh(&xfrm_policy_afinfo_lock);
2297         return err;
2298 }
2299 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2300
2301 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2302 {
2303         struct xfrm_policy_afinfo *afinfo;
2304         if (unlikely(family >= NPROTO))
2305                 return NULL;
2306         read_lock(&xfrm_policy_afinfo_lock);
2307         afinfo = xfrm_policy_afinfo[family];
2308         if (unlikely(!afinfo))
2309                 read_unlock(&xfrm_policy_afinfo_lock);
2310         return afinfo;
2311 }
2312
2313 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2314 {
2315         read_unlock(&xfrm_policy_afinfo_lock);
2316 }
2317
2318 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2319 {
2320         struct net_device *dev = ptr;
2321
2322         if (dev->nd_net != &init_net)
2323                 return NOTIFY_DONE;
2324
2325         switch (event) {
2326         case NETDEV_DOWN:
2327                 xfrm_flush_bundles();
2328         }
2329         return NOTIFY_DONE;
2330 }
2331
2332 static struct notifier_block xfrm_dev_notifier = {
2333         xfrm_dev_event,
2334         NULL,
2335         0
2336 };
2337
2338 #ifdef CONFIG_XFRM_STATISTICS
2339 static int __init xfrm_statistics_init(void)
2340 {
2341         if (snmp_mib_init((void **)xfrm_statistics,
2342                           sizeof(struct linux_xfrm_mib)) < 0)
2343                 return -ENOMEM;
2344         return 0;
2345 }
2346 #endif
2347
2348 static void __init xfrm_policy_init(void)
2349 {
2350         unsigned int hmask, sz;
2351         int dir;
2352
2353         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2354                                            sizeof(struct xfrm_dst),
2355                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2356                                            NULL);
2357
2358         hmask = 8 - 1;
2359         sz = (hmask+1) * sizeof(struct hlist_head);
2360
2361         xfrm_policy_byidx = xfrm_hash_alloc(sz);
2362         xfrm_idx_hmask = hmask;
2363         if (!xfrm_policy_byidx)
2364                 panic("XFRM: failed to allocate byidx hash\n");
2365
2366         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2367                 struct xfrm_policy_hash *htab;
2368
2369                 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2370
2371                 htab = &xfrm_policy_bydst[dir];
2372                 htab->table = xfrm_hash_alloc(sz);
2373                 htab->hmask = hmask;
2374                 if (!htab->table)
2375                         panic("XFRM: failed to allocate bydst hash\n");
2376         }
2377
2378         for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++)
2379                 INIT_LIST_HEAD(&xfrm_policy_bytype[dir]);
2380
2381         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2382         register_netdevice_notifier(&xfrm_dev_notifier);
2383 }
2384
2385 void __init xfrm_init(void)
2386 {
2387 #ifdef CONFIG_XFRM_STATISTICS
2388         xfrm_statistics_init();
2389 #endif
2390         xfrm_state_init();
2391         xfrm_policy_init();
2392         xfrm_input_init();
2393 #ifdef CONFIG_XFRM_STATISTICS
2394         xfrm_proc_init();
2395 #endif
2396 }
2397
2398 #ifdef CONFIG_AUDITSYSCALL
2399 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2400                                          struct audit_buffer *audit_buf)
2401 {
2402         struct xfrm_sec_ctx *ctx = xp->security;
2403         struct xfrm_selector *sel = &xp->selector;
2404
2405         if (ctx)
2406                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2407                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2408
2409         switch(sel->family) {
2410         case AF_INET:
2411                 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2412                                  NIPQUAD(sel->saddr.a4));
2413                 if (sel->prefixlen_s != 32)
2414                         audit_log_format(audit_buf, " src_prefixlen=%d",
2415                                          sel->prefixlen_s);
2416                 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2417                                  NIPQUAD(sel->daddr.a4));
2418                 if (sel->prefixlen_d != 32)
2419                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2420                                          sel->prefixlen_d);
2421                 break;
2422         case AF_INET6:
2423                 audit_log_format(audit_buf, " src=" NIP6_FMT,
2424                                  NIP6(*(struct in6_addr *)sel->saddr.a6));
2425                 if (sel->prefixlen_s != 128)
2426                         audit_log_format(audit_buf, " src_prefixlen=%d",
2427                                          sel->prefixlen_s);
2428                 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2429                                  NIP6(*(struct in6_addr *)sel->daddr.a6));
2430                 if (sel->prefixlen_d != 128)
2431                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2432                                          sel->prefixlen_d);
2433                 break;
2434         }
2435 }
2436
2437 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2438                            u32 auid, u32 secid)
2439 {
2440         struct audit_buffer *audit_buf;
2441
2442         audit_buf = xfrm_audit_start("SPD-add");
2443         if (audit_buf == NULL)
2444                 return;
2445         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2446         audit_log_format(audit_buf, " res=%u", result);
2447         xfrm_audit_common_policyinfo(xp, audit_buf);
2448         audit_log_end(audit_buf);
2449 }
2450 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2451
2452 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2453                               u32 auid, u32 secid)
2454 {
2455         struct audit_buffer *audit_buf;
2456
2457         audit_buf = xfrm_audit_start("SPD-delete");
2458         if (audit_buf == NULL)
2459                 return;
2460         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2461         audit_log_format(audit_buf, " res=%u", result);
2462         xfrm_audit_common_policyinfo(xp, audit_buf);
2463         audit_log_end(audit_buf);
2464 }
2465 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2466 #endif
2467
2468 #ifdef CONFIG_XFRM_MIGRATE
2469 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2470                                        struct xfrm_selector *sel_tgt)
2471 {
2472         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2473                 if (sel_tgt->family == sel_cmp->family &&
2474                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2475                                   sel_cmp->family) == 0 &&
2476                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2477                                   sel_cmp->family) == 0 &&
2478                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2479                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2480                         return 1;
2481                 }
2482         } else {
2483                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2484                         return 1;
2485                 }
2486         }
2487         return 0;
2488 }
2489
2490 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2491                                                      u8 dir, u8 type)
2492 {
2493         struct xfrm_policy *pol, *ret = NULL;
2494         struct hlist_node *entry;
2495         struct hlist_head *chain;
2496         u32 priority = ~0U;
2497
2498         read_lock_bh(&xfrm_policy_lock);
2499         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2500         hlist_for_each_entry(pol, entry, chain, bydst) {
2501                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2502                     pol->type == type) {
2503                         ret = pol;
2504                         priority = ret->priority;
2505                         break;
2506                 }
2507         }
2508         chain = &xfrm_policy_inexact[dir];
2509         hlist_for_each_entry(pol, entry, chain, bydst) {
2510                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2511                     pol->type == type &&
2512                     pol->priority < priority) {
2513                         ret = pol;
2514                         break;
2515                 }
2516         }
2517
2518         if (ret)
2519                 xfrm_pol_hold(ret);
2520
2521         read_unlock_bh(&xfrm_policy_lock);
2522
2523         return ret;
2524 }
2525
2526 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2527 {
2528         int match = 0;
2529
2530         if (t->mode == m->mode && t->id.proto == m->proto &&
2531             (m->reqid == 0 || t->reqid == m->reqid)) {
2532                 switch (t->mode) {
2533                 case XFRM_MODE_TUNNEL:
2534                 case XFRM_MODE_BEET:
2535                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2536                                           m->old_family) == 0 &&
2537                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2538                                           m->old_family) == 0) {
2539                                 match = 1;
2540                         }
2541                         break;
2542                 case XFRM_MODE_TRANSPORT:
2543                         /* in case of transport mode, template does not store
2544                            any IP addresses, hence we just compare mode and
2545                            protocol */
2546                         match = 1;
2547                         break;
2548                 default:
2549                         break;
2550                 }
2551         }
2552         return match;
2553 }
2554
2555 /* update endpoint address(es) of template(s) */
2556 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2557                                struct xfrm_migrate *m, int num_migrate)
2558 {
2559         struct xfrm_migrate *mp;
2560         struct dst_entry *dst;
2561         int i, j, n = 0;
2562
2563         write_lock_bh(&pol->lock);
2564         if (unlikely(pol->dead)) {
2565                 /* target policy has been deleted */
2566                 write_unlock_bh(&pol->lock);
2567                 return -ENOENT;
2568         }
2569
2570         for (i = 0; i < pol->xfrm_nr; i++) {
2571                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2572                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2573                                 continue;
2574                         n++;
2575                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2576                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2577                                 continue;
2578                         /* update endpoints */
2579                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2580                                sizeof(pol->xfrm_vec[i].id.daddr));
2581                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2582                                sizeof(pol->xfrm_vec[i].saddr));
2583                         pol->xfrm_vec[i].encap_family = mp->new_family;
2584                         /* flush bundles */
2585                         while ((dst = pol->bundles) != NULL) {
2586                                 pol->bundles = dst->next;
2587                                 dst_free(dst);
2588                         }
2589                 }
2590         }
2591
2592         write_unlock_bh(&pol->lock);
2593
2594         if (!n)
2595                 return -ENODATA;
2596
2597         return 0;
2598 }
2599
2600 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2601 {
2602         int i, j;
2603
2604         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2605                 return -EINVAL;
2606
2607         for (i = 0; i < num_migrate; i++) {
2608                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2609                                    m[i].old_family) == 0) &&
2610                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2611                                    m[i].old_family) == 0))
2612                         return -EINVAL;
2613                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2614                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2615                         return -EINVAL;
2616
2617                 /* check if there is any duplicated entry */
2618                 for (j = i + 1; j < num_migrate; j++) {
2619                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2620                                     sizeof(m[i].old_daddr)) &&
2621                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2622                                     sizeof(m[i].old_saddr)) &&
2623                             m[i].proto == m[j].proto &&
2624                             m[i].mode == m[j].mode &&
2625                             m[i].reqid == m[j].reqid &&
2626                             m[i].old_family == m[j].old_family)
2627                                 return -EINVAL;
2628                 }
2629         }
2630
2631         return 0;
2632 }
2633
2634 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2635                  struct xfrm_migrate *m, int num_migrate)
2636 {
2637         int i, err, nx_cur = 0, nx_new = 0;
2638         struct xfrm_policy *pol = NULL;
2639         struct xfrm_state *x, *xc;
2640         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2641         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2642         struct xfrm_migrate *mp;
2643
2644         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2645                 goto out;
2646
2647         /* Stage 1 - find policy */
2648         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2649                 err = -ENOENT;
2650                 goto out;
2651         }
2652
2653         /* Stage 2 - find and update state(s) */
2654         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2655                 if ((x = xfrm_migrate_state_find(mp))) {
2656                         x_cur[nx_cur] = x;
2657                         nx_cur++;
2658                         if ((xc = xfrm_state_migrate(x, mp))) {
2659                                 x_new[nx_new] = xc;
2660                                 nx_new++;
2661                         } else {
2662                                 err = -ENODATA;
2663                                 goto restore_state;
2664                         }
2665                 }
2666         }
2667
2668         /* Stage 3 - update policy */
2669         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2670                 goto restore_state;
2671
2672         /* Stage 4 - delete old state(s) */
2673         if (nx_cur) {
2674                 xfrm_states_put(x_cur, nx_cur);
2675                 xfrm_states_delete(x_cur, nx_cur);
2676         }
2677
2678         /* Stage 5 - announce */
2679         km_migrate(sel, dir, type, m, num_migrate);
2680
2681         xfrm_pol_put(pol);
2682
2683         return 0;
2684 out:
2685         return err;
2686
2687 restore_state:
2688         if (pol)
2689                 xfrm_pol_put(pol);
2690         if (nx_cur)
2691                 xfrm_states_put(x_cur, nx_cur);
2692         if (nx_new)
2693                 xfrm_states_delete(x_new, nx_new);
2694
2695         return err;
2696 }
2697 EXPORT_SYMBOL(xfrm_migrate);
2698 #endif