]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/xfrm/xfrm_state.c
[XFRM]: Add generation count to xfrm_state and xfrm_dst.
[mv-sheeva.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by daddr to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
59 {
60         unsigned int h;
61         h = ntohl(addr->a4);
62         h = (h ^ (h>>16)) & hmask;
63         return h;
64 }
65
66 static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
67 {
68         unsigned int h;
69         h = ntohl(addr->a6[2]^addr->a6[3]);
70         h = (h ^ (h>>16)) & hmask;
71         return h;
72 }
73
74 static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
75 {
76         return __xfrm4_dst_hash(addr, hmask);
77 }
78
79 static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
80 {
81         return __xfrm6_dst_hash(addr, hmask);
82 }
83
84 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,  unsigned int hmask)
85 {
86         switch (family) {
87         case AF_INET:
88                 return __xfrm4_src_hash(addr, hmask);
89         case AF_INET6:
90                 return __xfrm6_src_hash(addr, hmask);
91         }
92         return 0;
93 }
94
95 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
96 {
97         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
98 }
99
100 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
101 {
102         switch (family) {
103         case AF_INET:
104                 return __xfrm4_dst_hash(addr, hmask);
105         case AF_INET6:
106                 return __xfrm6_dst_hash(addr, hmask);
107         }
108         return 0;
109 }
110
111 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
112 {
113         return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
114 }
115
116 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
117                                         unsigned int hmask)
118 {
119         unsigned int h;
120         h = ntohl(addr->a4^spi^proto);
121         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
122         return h;
123 }
124
125 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
126                                             unsigned int hmask)
127 {
128         unsigned int h;
129         h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
130         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
131         return h;
132 }
133
134 static inline
135 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
136                          unsigned int hmask)
137 {
138         switch (family) {
139         case AF_INET:
140                 return __xfrm4_spi_hash(addr, spi, proto, hmask);
141         case AF_INET6:
142                 return __xfrm6_spi_hash(addr, spi, proto, hmask);
143         }
144         return 0;       /*XXX*/
145 }
146
147 static inline unsigned int
148 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
149 {
150         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
151 }
152
153 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
154 {
155         struct hlist_head *n;
156
157         if (sz <= PAGE_SIZE)
158                 n = kmalloc(sz, GFP_KERNEL);
159         else if (hashdist)
160                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
161         else
162                 n = (struct hlist_head *)
163                         __get_free_pages(GFP_KERNEL, get_order(sz));
164
165         if (n)
166                 memset(n, 0, sz);
167
168         return n;
169 }
170
171 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
172 {
173         if (sz <= PAGE_SIZE)
174                 kfree(n);
175         else if (hashdist)
176                 vfree(n);
177         else
178                 free_pages((unsigned long)n, get_order(sz));
179 }
180
181 static void xfrm_hash_transfer(struct hlist_head *list,
182                                struct hlist_head *ndsttable,
183                                struct hlist_head *nsrctable,
184                                struct hlist_head *nspitable,
185                                unsigned int nhashmask)
186 {
187         struct hlist_node *entry, *tmp;
188         struct xfrm_state *x;
189
190         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
191                 unsigned int h;
192
193                 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
194                 hlist_add_head(&x->bydst, ndsttable+h);
195
196                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
197                                     nhashmask);
198                 hlist_add_head(&x->bysrc, nsrctable+h);
199
200                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
201                                     x->props.family, nhashmask);
202                 hlist_add_head(&x->byspi, nspitable+h);
203         }
204 }
205
206 static unsigned long xfrm_hash_new_size(void)
207 {
208         return ((xfrm_state_hmask + 1) << 1) *
209                 sizeof(struct hlist_head);
210 }
211
212 static DEFINE_MUTEX(hash_resize_mutex);
213
214 static void xfrm_hash_resize(void *__unused)
215 {
216         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
217         unsigned long nsize, osize;
218         unsigned int nhashmask, ohashmask;
219         int i;
220
221         mutex_lock(&hash_resize_mutex);
222
223         nsize = xfrm_hash_new_size();
224         ndst = xfrm_state_hash_alloc(nsize);
225         if (!ndst)
226                 goto out_unlock;
227         nsrc = xfrm_state_hash_alloc(nsize);
228         if (!nsrc) {
229                 xfrm_state_hash_free(ndst, nsize);
230                 goto out_unlock;
231         }
232         nspi = xfrm_state_hash_alloc(nsize);
233         if (!nspi) {
234                 xfrm_state_hash_free(ndst, nsize);
235                 xfrm_state_hash_free(nsrc, nsize);
236                 goto out_unlock;
237         }
238
239         spin_lock_bh(&xfrm_state_lock);
240
241         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
242         for (i = xfrm_state_hmask; i >= 0; i--)
243                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
244                                    nhashmask);
245
246         odst = xfrm_state_bydst;
247         osrc = xfrm_state_bysrc;
248         ospi = xfrm_state_byspi;
249         ohashmask = xfrm_state_hmask;
250
251         xfrm_state_bydst = ndst;
252         xfrm_state_bysrc = nsrc;
253         xfrm_state_byspi = nspi;
254         xfrm_state_hmask = nhashmask;
255
256         spin_unlock_bh(&xfrm_state_lock);
257
258         osize = (ohashmask + 1) * sizeof(struct hlist_head);
259         xfrm_state_hash_free(odst, osize);
260         xfrm_state_hash_free(osrc, osize);
261         xfrm_state_hash_free(ospi, osize);
262
263 out_unlock:
264         mutex_unlock(&hash_resize_mutex);
265 }
266
267 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
268
269 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
270 EXPORT_SYMBOL(km_waitq);
271
272 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
273 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
274
275 static struct work_struct xfrm_state_gc_work;
276 static HLIST_HEAD(xfrm_state_gc_list);
277 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
278
279 static int xfrm_state_gc_flush_bundles;
280
281 int __xfrm_state_delete(struct xfrm_state *x);
282
283 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
284 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
285
286 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
287 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
288
289 static void xfrm_state_gc_destroy(struct xfrm_state *x)
290 {
291         if (del_timer(&x->timer))
292                 BUG();
293         if (del_timer(&x->rtimer))
294                 BUG();
295         kfree(x->aalg);
296         kfree(x->ealg);
297         kfree(x->calg);
298         kfree(x->encap);
299         kfree(x->coaddr);
300         if (x->mode)
301                 xfrm_put_mode(x->mode);
302         if (x->type) {
303                 x->type->destructor(x);
304                 xfrm_put_type(x->type);
305         }
306         security_xfrm_state_free(x);
307         kfree(x);
308 }
309
310 static void xfrm_state_gc_task(void *data)
311 {
312         struct xfrm_state *x;
313         struct hlist_node *entry, *tmp;
314         struct hlist_head gc_list;
315
316         if (xfrm_state_gc_flush_bundles) {
317                 xfrm_state_gc_flush_bundles = 0;
318                 xfrm_flush_bundles();
319         }
320
321         spin_lock_bh(&xfrm_state_gc_lock);
322         gc_list.first = xfrm_state_gc_list.first;
323         INIT_HLIST_HEAD(&xfrm_state_gc_list);
324         spin_unlock_bh(&xfrm_state_gc_lock);
325
326         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
327                 xfrm_state_gc_destroy(x);
328
329         wake_up(&km_waitq);
330 }
331
332 static inline unsigned long make_jiffies(long secs)
333 {
334         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
335                 return MAX_SCHEDULE_TIMEOUT-1;
336         else
337                 return secs*HZ;
338 }
339
340 static void xfrm_timer_handler(unsigned long data)
341 {
342         struct xfrm_state *x = (struct xfrm_state*)data;
343         unsigned long now = (unsigned long)xtime.tv_sec;
344         long next = LONG_MAX;
345         int warn = 0;
346
347         spin_lock(&x->lock);
348         if (x->km.state == XFRM_STATE_DEAD)
349                 goto out;
350         if (x->km.state == XFRM_STATE_EXPIRED)
351                 goto expired;
352         if (x->lft.hard_add_expires_seconds) {
353                 long tmo = x->lft.hard_add_expires_seconds +
354                         x->curlft.add_time - now;
355                 if (tmo <= 0)
356                         goto expired;
357                 if (tmo < next)
358                         next = tmo;
359         }
360         if (x->lft.hard_use_expires_seconds) {
361                 long tmo = x->lft.hard_use_expires_seconds +
362                         (x->curlft.use_time ? : now) - now;
363                 if (tmo <= 0)
364                         goto expired;
365                 if (tmo < next)
366                         next = tmo;
367         }
368         if (x->km.dying)
369                 goto resched;
370         if (x->lft.soft_add_expires_seconds) {
371                 long tmo = x->lft.soft_add_expires_seconds +
372                         x->curlft.add_time - now;
373                 if (tmo <= 0)
374                         warn = 1;
375                 else if (tmo < next)
376                         next = tmo;
377         }
378         if (x->lft.soft_use_expires_seconds) {
379                 long tmo = x->lft.soft_use_expires_seconds +
380                         (x->curlft.use_time ? : now) - now;
381                 if (tmo <= 0)
382                         warn = 1;
383                 else if (tmo < next)
384                         next = tmo;
385         }
386
387         x->km.dying = warn;
388         if (warn)
389                 km_state_expired(x, 0, 0);
390 resched:
391         if (next != LONG_MAX &&
392             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
393                 xfrm_state_hold(x);
394         goto out;
395
396 expired:
397         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
398                 x->km.state = XFRM_STATE_EXPIRED;
399                 wake_up(&km_waitq);
400                 next = 2;
401                 goto resched;
402         }
403         if (!__xfrm_state_delete(x) && x->id.spi)
404                 km_state_expired(x, 1, 0);
405
406 out:
407         spin_unlock(&x->lock);
408         xfrm_state_put(x);
409 }
410
411 static void xfrm_replay_timer_handler(unsigned long data);
412
413 struct xfrm_state *xfrm_state_alloc(void)
414 {
415         struct xfrm_state *x;
416
417         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
418
419         if (x) {
420                 atomic_set(&x->refcnt, 1);
421                 atomic_set(&x->tunnel_users, 0);
422                 INIT_HLIST_NODE(&x->bydst);
423                 INIT_HLIST_NODE(&x->bysrc);
424                 INIT_HLIST_NODE(&x->byspi);
425                 init_timer(&x->timer);
426                 x->timer.function = xfrm_timer_handler;
427                 x->timer.data     = (unsigned long)x;
428                 init_timer(&x->rtimer);
429                 x->rtimer.function = xfrm_replay_timer_handler;
430                 x->rtimer.data     = (unsigned long)x;
431                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
432                 x->lft.soft_byte_limit = XFRM_INF;
433                 x->lft.soft_packet_limit = XFRM_INF;
434                 x->lft.hard_byte_limit = XFRM_INF;
435                 x->lft.hard_packet_limit = XFRM_INF;
436                 x->replay_maxage = 0;
437                 x->replay_maxdiff = 0;
438                 spin_lock_init(&x->lock);
439         }
440         return x;
441 }
442 EXPORT_SYMBOL(xfrm_state_alloc);
443
444 void __xfrm_state_destroy(struct xfrm_state *x)
445 {
446         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
447
448         spin_lock_bh(&xfrm_state_gc_lock);
449         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
450         spin_unlock_bh(&xfrm_state_gc_lock);
451         schedule_work(&xfrm_state_gc_work);
452 }
453 EXPORT_SYMBOL(__xfrm_state_destroy);
454
455 int __xfrm_state_delete(struct xfrm_state *x)
456 {
457         int err = -ESRCH;
458
459         if (x->km.state != XFRM_STATE_DEAD) {
460                 x->km.state = XFRM_STATE_DEAD;
461                 spin_lock(&xfrm_state_lock);
462                 hlist_del(&x->bydst);
463                 __xfrm_state_put(x);
464                 hlist_del(&x->bysrc);
465                 __xfrm_state_put(x);
466                 if (x->id.spi) {
467                         hlist_del(&x->byspi);
468                         __xfrm_state_put(x);
469                 }
470                 xfrm_state_num--;
471                 spin_unlock(&xfrm_state_lock);
472                 if (del_timer(&x->timer))
473                         __xfrm_state_put(x);
474                 if (del_timer(&x->rtimer))
475                         __xfrm_state_put(x);
476
477                 /* The number two in this test is the reference
478                  * mentioned in the comment below plus the reference
479                  * our caller holds.  A larger value means that
480                  * there are DSTs attached to this xfrm_state.
481                  */
482                 if (atomic_read(&x->refcnt) > 2) {
483                         xfrm_state_gc_flush_bundles = 1;
484                         schedule_work(&xfrm_state_gc_work);
485                 }
486
487                 /* All xfrm_state objects are created by xfrm_state_alloc.
488                  * The xfrm_state_alloc call gives a reference, and that
489                  * is what we are dropping here.
490                  */
491                 __xfrm_state_put(x);
492                 err = 0;
493         }
494
495         return err;
496 }
497 EXPORT_SYMBOL(__xfrm_state_delete);
498
499 int xfrm_state_delete(struct xfrm_state *x)
500 {
501         int err;
502
503         spin_lock_bh(&x->lock);
504         err = __xfrm_state_delete(x);
505         spin_unlock_bh(&x->lock);
506
507         return err;
508 }
509 EXPORT_SYMBOL(xfrm_state_delete);
510
511 void xfrm_state_flush(u8 proto)
512 {
513         int i;
514
515         spin_lock_bh(&xfrm_state_lock);
516         for (i = 0; i < xfrm_state_hmask; i++) {
517                 struct hlist_node *entry;
518                 struct xfrm_state *x;
519 restart:
520                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
521                         if (!xfrm_state_kern(x) &&
522                             xfrm_id_proto_match(x->id.proto, proto)) {
523                                 xfrm_state_hold(x);
524                                 spin_unlock_bh(&xfrm_state_lock);
525
526                                 xfrm_state_delete(x);
527                                 xfrm_state_put(x);
528
529                                 spin_lock_bh(&xfrm_state_lock);
530                                 goto restart;
531                         }
532                 }
533         }
534         spin_unlock_bh(&xfrm_state_lock);
535         wake_up(&km_waitq);
536 }
537 EXPORT_SYMBOL(xfrm_state_flush);
538
539 static int
540 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
541                   struct xfrm_tmpl *tmpl,
542                   xfrm_address_t *daddr, xfrm_address_t *saddr,
543                   unsigned short family)
544 {
545         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
546         if (!afinfo)
547                 return -1;
548         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
549         xfrm_state_put_afinfo(afinfo);
550         return 0;
551 }
552
553 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
554 {
555         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
556         struct xfrm_state *x;
557         struct hlist_node *entry;
558
559         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
560                 if (x->props.family != family ||
561                     x->id.spi       != spi ||
562                     x->id.proto     != proto)
563                         continue;
564
565                 switch (family) {
566                 case AF_INET:
567                         if (x->id.daddr.a4 != daddr->a4)
568                                 continue;
569                         break;
570                 case AF_INET6:
571                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
572                                              (struct in6_addr *)
573                                              x->id.daddr.a6))
574                                 continue;
575                         break;
576                 };
577
578                 xfrm_state_hold(x);
579                 return x;
580         }
581
582         return NULL;
583 }
584
585 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
586 {
587         unsigned int h = xfrm_src_hash(saddr, family);
588         struct xfrm_state *x;
589         struct hlist_node *entry;
590
591         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
592                 if (x->props.family != family ||
593                     x->id.proto     != proto)
594                         continue;
595
596                 switch (family) {
597                 case AF_INET:
598                         if (x->id.daddr.a4 != daddr->a4 ||
599                             x->props.saddr.a4 != saddr->a4)
600                                 continue;
601                         break;
602                 case AF_INET6:
603                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
604                                              (struct in6_addr *)
605                                              x->id.daddr.a6) ||
606                             !ipv6_addr_equal((struct in6_addr *)saddr,
607                                              (struct in6_addr *)
608                                              x->props.saddr.a6))
609                                 continue;
610                         break;
611                 };
612
613                 xfrm_state_hold(x);
614                 return x;
615         }
616
617         return NULL;
618 }
619
620 static inline struct xfrm_state *
621 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
622 {
623         if (use_spi)
624                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
625                                            x->id.proto, family);
626         else
627                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
628                                                   &x->props.saddr,
629                                                   x->id.proto, family);
630 }
631
632 struct xfrm_state *
633 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
634                 struct flowi *fl, struct xfrm_tmpl *tmpl,
635                 struct xfrm_policy *pol, int *err,
636                 unsigned short family)
637 {
638         unsigned int h = xfrm_dst_hash(daddr, family);
639         struct hlist_node *entry;
640         struct xfrm_state *x, *x0;
641         int acquire_in_progress = 0;
642         int error = 0;
643         struct xfrm_state *best = NULL;
644         
645         spin_lock_bh(&xfrm_state_lock);
646         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
647                 if (x->props.family == family &&
648                     x->props.reqid == tmpl->reqid &&
649                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
650                     xfrm_state_addr_check(x, daddr, saddr, family) &&
651                     tmpl->mode == x->props.mode &&
652                     tmpl->id.proto == x->id.proto &&
653                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
654                         /* Resolution logic:
655                            1. There is a valid state with matching selector.
656                               Done.
657                            2. Valid state with inappropriate selector. Skip.
658
659                            Entering area of "sysdeps".
660
661                            3. If state is not valid, selector is temporary,
662                               it selects only session which triggered
663                               previous resolution. Key manager will do
664                               something to install a state with proper
665                               selector.
666                          */
667                         if (x->km.state == XFRM_STATE_VALID) {
668                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
669                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
670                                         continue;
671                                 if (!best ||
672                                     best->km.dying > x->km.dying ||
673                                     (best->km.dying == x->km.dying &&
674                                      best->curlft.add_time < x->curlft.add_time))
675                                         best = x;
676                         } else if (x->km.state == XFRM_STATE_ACQ) {
677                                 acquire_in_progress = 1;
678                         } else if (x->km.state == XFRM_STATE_ERROR ||
679                                    x->km.state == XFRM_STATE_EXPIRED) {
680                                 if (xfrm_selector_match(&x->sel, fl, family) &&
681                                     security_xfrm_state_pol_flow_match(x, pol, fl))
682                                         error = -ESRCH;
683                         }
684                 }
685         }
686
687         x = best;
688         if (!x && !error && !acquire_in_progress) {
689                 if (tmpl->id.spi &&
690                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
691                                               tmpl->id.proto, family)) != NULL) {
692                         xfrm_state_put(x0);
693                         error = -EEXIST;
694                         goto out;
695                 }
696                 x = xfrm_state_alloc();
697                 if (x == NULL) {
698                         error = -ENOMEM;
699                         goto out;
700                 }
701                 /* Initialize temporary selector matching only
702                  * to current session. */
703                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
704
705                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
706                 if (error) {
707                         x->km.state = XFRM_STATE_DEAD;
708                         xfrm_state_put(x);
709                         x = NULL;
710                         goto out;
711                 }
712
713                 if (km_query(x, tmpl, pol) == 0) {
714                         x->km.state = XFRM_STATE_ACQ;
715                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
716                         xfrm_state_hold(x);
717                         h = xfrm_src_hash(saddr, family);
718                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
719                         xfrm_state_hold(x);
720                         if (x->id.spi) {
721                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
722                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
723                                 xfrm_state_hold(x);
724                         }
725                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
726                         xfrm_state_hold(x);
727                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
728                         add_timer(&x->timer);
729                 } else {
730                         x->km.state = XFRM_STATE_DEAD;
731                         xfrm_state_put(x);
732                         x = NULL;
733                         error = -ESRCH;
734                 }
735         }
736 out:
737         if (x)
738                 xfrm_state_hold(x);
739         else
740                 *err = acquire_in_progress ? -EAGAIN : error;
741         spin_unlock_bh(&xfrm_state_lock);
742         return x;
743 }
744
745 static void __xfrm_state_insert(struct xfrm_state *x)
746 {
747         unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
748
749         x->genid = ++xfrm_state_genid;
750
751         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
752         xfrm_state_hold(x);
753
754         h = xfrm_src_hash(&x->props.saddr, x->props.family);
755
756         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
757         xfrm_state_hold(x);
758
759         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
760                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
761                                   x->props.family);
762
763                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
764                 xfrm_state_hold(x);
765         }
766
767         if (!mod_timer(&x->timer, jiffies + HZ))
768                 xfrm_state_hold(x);
769
770         if (x->replay_maxage &&
771             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
772                 xfrm_state_hold(x);
773
774         wake_up(&km_waitq);
775
776         xfrm_state_num++;
777
778         if (x->bydst.next != NULL &&
779             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
780             xfrm_state_num > xfrm_state_hmask)
781                 schedule_work(&xfrm_hash_work);
782 }
783
784 void xfrm_state_insert(struct xfrm_state *x)
785 {
786         spin_lock_bh(&xfrm_state_lock);
787         __xfrm_state_insert(x);
788         spin_unlock_bh(&xfrm_state_lock);
789
790         xfrm_flush_all_bundles();
791 }
792 EXPORT_SYMBOL(xfrm_state_insert);
793
794 /* xfrm_state_lock is held */
795 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
796 {
797         unsigned int h = xfrm_dst_hash(daddr, family);
798         struct hlist_node *entry;
799         struct xfrm_state *x;
800
801         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
802                 if (x->props.reqid  != reqid ||
803                     x->props.mode   != mode ||
804                     x->props.family != family ||
805                     x->km.state     != XFRM_STATE_ACQ ||
806                     x->id.spi       != 0)
807                         continue;
808
809                 switch (family) {
810                 case AF_INET:
811                         if (x->id.daddr.a4    != daddr->a4 ||
812                             x->props.saddr.a4 != saddr->a4)
813                                 continue;
814                         break;
815                 case AF_INET6:
816                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
817                                              (struct in6_addr *)daddr) ||
818                             !ipv6_addr_equal((struct in6_addr *)
819                                              x->props.saddr.a6,
820                                              (struct in6_addr *)saddr))
821                                 continue;
822                         break;
823                 };
824
825                 xfrm_state_hold(x);
826                 return x;
827         }
828
829         if (!create)
830                 return NULL;
831
832         x = xfrm_state_alloc();
833         if (likely(x)) {
834                 switch (family) {
835                 case AF_INET:
836                         x->sel.daddr.a4 = daddr->a4;
837                         x->sel.saddr.a4 = saddr->a4;
838                         x->sel.prefixlen_d = 32;
839                         x->sel.prefixlen_s = 32;
840                         x->props.saddr.a4 = saddr->a4;
841                         x->id.daddr.a4 = daddr->a4;
842                         break;
843
844                 case AF_INET6:
845                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
846                                        (struct in6_addr *)daddr);
847                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
848                                        (struct in6_addr *)saddr);
849                         x->sel.prefixlen_d = 128;
850                         x->sel.prefixlen_s = 128;
851                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
852                                        (struct in6_addr *)saddr);
853                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
854                                        (struct in6_addr *)daddr);
855                         break;
856                 };
857
858                 x->km.state = XFRM_STATE_ACQ;
859                 x->id.proto = proto;
860                 x->props.family = family;
861                 x->props.mode = mode;
862                 x->props.reqid = reqid;
863                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
864                 xfrm_state_hold(x);
865                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
866                 add_timer(&x->timer);
867                 xfrm_state_hold(x);
868                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
869                 h = xfrm_src_hash(saddr, family);
870                 xfrm_state_hold(x);
871                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
872                 wake_up(&km_waitq);
873         }
874
875         return x;
876 }
877
878 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
879
880 int xfrm_state_add(struct xfrm_state *x)
881 {
882         struct xfrm_state *x1;
883         int family;
884         int err;
885         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
886
887         family = x->props.family;
888
889         spin_lock_bh(&xfrm_state_lock);
890
891         x1 = __xfrm_state_locate(x, use_spi, family);
892         if (x1) {
893                 xfrm_state_put(x1);
894                 x1 = NULL;
895                 err = -EEXIST;
896                 goto out;
897         }
898
899         if (use_spi && x->km.seq) {
900                 x1 = __xfrm_find_acq_byseq(x->km.seq);
901                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
902                         xfrm_state_put(x1);
903                         x1 = NULL;
904                 }
905         }
906
907         if (use_spi && !x1)
908                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
909                                      x->id.proto,
910                                      &x->id.daddr, &x->props.saddr, 0);
911
912         __xfrm_state_insert(x);
913         err = 0;
914
915 out:
916         spin_unlock_bh(&xfrm_state_lock);
917
918         if (!err)
919                 xfrm_flush_all_bundles();
920
921         if (x1) {
922                 xfrm_state_delete(x1);
923                 xfrm_state_put(x1);
924         }
925
926         return err;
927 }
928 EXPORT_SYMBOL(xfrm_state_add);
929
930 int xfrm_state_update(struct xfrm_state *x)
931 {
932         struct xfrm_state *x1;
933         int err;
934         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
935
936         spin_lock_bh(&xfrm_state_lock);
937         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
938
939         err = -ESRCH;
940         if (!x1)
941                 goto out;
942
943         if (xfrm_state_kern(x1)) {
944                 xfrm_state_put(x1);
945                 err = -EEXIST;
946                 goto out;
947         }
948
949         if (x1->km.state == XFRM_STATE_ACQ) {
950                 __xfrm_state_insert(x);
951                 x = NULL;
952         }
953         err = 0;
954
955 out:
956         spin_unlock_bh(&xfrm_state_lock);
957
958         if (err)
959                 return err;
960
961         if (!x) {
962                 xfrm_state_delete(x1);
963                 xfrm_state_put(x1);
964                 return 0;
965         }
966
967         err = -EINVAL;
968         spin_lock_bh(&x1->lock);
969         if (likely(x1->km.state == XFRM_STATE_VALID)) {
970                 if (x->encap && x1->encap)
971                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
972                 if (x->coaddr && x1->coaddr) {
973                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
974                 }
975                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
976                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
977                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
978                 x1->km.dying = 0;
979
980                 if (!mod_timer(&x1->timer, jiffies + HZ))
981                         xfrm_state_hold(x1);
982                 if (x1->curlft.use_time)
983                         xfrm_state_check_expire(x1);
984
985                 err = 0;
986         }
987         spin_unlock_bh(&x1->lock);
988
989         xfrm_state_put(x1);
990
991         return err;
992 }
993 EXPORT_SYMBOL(xfrm_state_update);
994
995 int xfrm_state_check_expire(struct xfrm_state *x)
996 {
997         if (!x->curlft.use_time)
998                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
999
1000         if (x->km.state != XFRM_STATE_VALID)
1001                 return -EINVAL;
1002
1003         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1004             x->curlft.packets >= x->lft.hard_packet_limit) {
1005                 x->km.state = XFRM_STATE_EXPIRED;
1006                 if (!mod_timer(&x->timer, jiffies))
1007                         xfrm_state_hold(x);
1008                 return -EINVAL;
1009         }
1010
1011         if (!x->km.dying &&
1012             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1013              x->curlft.packets >= x->lft.soft_packet_limit)) {
1014                 x->km.dying = 1;
1015                 km_state_expired(x, 0, 0);
1016         }
1017         return 0;
1018 }
1019 EXPORT_SYMBOL(xfrm_state_check_expire);
1020
1021 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1022 {
1023         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1024                 - skb_headroom(skb);
1025
1026         if (nhead > 0)
1027                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1028
1029         /* Check tail too... */
1030         return 0;
1031 }
1032
1033 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1034 {
1035         int err = xfrm_state_check_expire(x);
1036         if (err < 0)
1037                 goto err;
1038         err = xfrm_state_check_space(x, skb);
1039 err:
1040         return err;
1041 }
1042 EXPORT_SYMBOL(xfrm_state_check);
1043
1044 struct xfrm_state *
1045 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1046                   unsigned short family)
1047 {
1048         struct xfrm_state *x;
1049
1050         spin_lock_bh(&xfrm_state_lock);
1051         x = __xfrm_state_lookup(daddr, spi, proto, family);
1052         spin_unlock_bh(&xfrm_state_lock);
1053         return x;
1054 }
1055 EXPORT_SYMBOL(xfrm_state_lookup);
1056
1057 struct xfrm_state *
1058 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1059                          u8 proto, unsigned short family)
1060 {
1061         struct xfrm_state *x;
1062
1063         spin_lock_bh(&xfrm_state_lock);
1064         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1065         spin_unlock_bh(&xfrm_state_lock);
1066         return x;
1067 }
1068 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1069
1070 struct xfrm_state *
1071 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1072               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1073               int create, unsigned short family)
1074 {
1075         struct xfrm_state *x;
1076
1077         spin_lock_bh(&xfrm_state_lock);
1078         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1079         spin_unlock_bh(&xfrm_state_lock);
1080
1081         return x;
1082 }
1083 EXPORT_SYMBOL(xfrm_find_acq);
1084
1085 #ifdef CONFIG_XFRM_SUB_POLICY
1086 int
1087 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1088                unsigned short family)
1089 {
1090         int err = 0;
1091         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1092         if (!afinfo)
1093                 return -EAFNOSUPPORT;
1094
1095         spin_lock_bh(&xfrm_state_lock);
1096         if (afinfo->tmpl_sort)
1097                 err = afinfo->tmpl_sort(dst, src, n);
1098         spin_unlock_bh(&xfrm_state_lock);
1099         xfrm_state_put_afinfo(afinfo);
1100         return err;
1101 }
1102 EXPORT_SYMBOL(xfrm_tmpl_sort);
1103
1104 int
1105 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1106                 unsigned short family)
1107 {
1108         int err = 0;
1109         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1110         if (!afinfo)
1111                 return -EAFNOSUPPORT;
1112
1113         spin_lock_bh(&xfrm_state_lock);
1114         if (afinfo->state_sort)
1115                 err = afinfo->state_sort(dst, src, n);
1116         spin_unlock_bh(&xfrm_state_lock);
1117         xfrm_state_put_afinfo(afinfo);
1118         return err;
1119 }
1120 EXPORT_SYMBOL(xfrm_state_sort);
1121 #endif
1122
1123 /* Silly enough, but I'm lazy to build resolution list */
1124
1125 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1126 {
1127         int i;
1128
1129         for (i = 0; i <= xfrm_state_hmask; i++) {
1130                 struct hlist_node *entry;
1131                 struct xfrm_state *x;
1132
1133                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1134                         if (x->km.seq == seq &&
1135                             x->km.state == XFRM_STATE_ACQ) {
1136                                 xfrm_state_hold(x);
1137                                 return x;
1138                         }
1139                 }
1140         }
1141         return NULL;
1142 }
1143
1144 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1145 {
1146         struct xfrm_state *x;
1147
1148         spin_lock_bh(&xfrm_state_lock);
1149         x = __xfrm_find_acq_byseq(seq);
1150         spin_unlock_bh(&xfrm_state_lock);
1151         return x;
1152 }
1153 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1154
1155 u32 xfrm_get_acqseq(void)
1156 {
1157         u32 res;
1158         static u32 acqseq;
1159         static DEFINE_SPINLOCK(acqseq_lock);
1160
1161         spin_lock_bh(&acqseq_lock);
1162         res = (++acqseq ? : ++acqseq);
1163         spin_unlock_bh(&acqseq_lock);
1164         return res;
1165 }
1166 EXPORT_SYMBOL(xfrm_get_acqseq);
1167
1168 void
1169 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1170 {
1171         unsigned int h;
1172         struct xfrm_state *x0;
1173
1174         if (x->id.spi)
1175                 return;
1176
1177         if (minspi == maxspi) {
1178                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1179                 if (x0) {
1180                         xfrm_state_put(x0);
1181                         return;
1182                 }
1183                 x->id.spi = minspi;
1184         } else {
1185                 u32 spi = 0;
1186                 minspi = ntohl(minspi);
1187                 maxspi = ntohl(maxspi);
1188                 for (h=0; h<maxspi-minspi+1; h++) {
1189                         spi = minspi + net_random()%(maxspi-minspi+1);
1190                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1191                         if (x0 == NULL) {
1192                                 x->id.spi = htonl(spi);
1193                                 break;
1194                         }
1195                         xfrm_state_put(x0);
1196                 }
1197         }
1198         if (x->id.spi) {
1199                 spin_lock_bh(&xfrm_state_lock);
1200                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1201                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1202                 xfrm_state_hold(x);
1203                 spin_unlock_bh(&xfrm_state_lock);
1204                 wake_up(&km_waitq);
1205         }
1206 }
1207 EXPORT_SYMBOL(xfrm_alloc_spi);
1208
1209 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1210                     void *data)
1211 {
1212         int i;
1213         struct xfrm_state *x;
1214         struct hlist_node *entry;
1215         int count = 0;
1216         int err = 0;
1217
1218         spin_lock_bh(&xfrm_state_lock);
1219         for (i = 0; i <= xfrm_state_hmask; i++) {
1220                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1221                         if (xfrm_id_proto_match(x->id.proto, proto))
1222                                 count++;
1223                 }
1224         }
1225         if (count == 0) {
1226                 err = -ENOENT;
1227                 goto out;
1228         }
1229
1230         for (i = 0; i <= xfrm_state_hmask; i++) {
1231                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1232                         if (!xfrm_id_proto_match(x->id.proto, proto))
1233                                 continue;
1234                         err = func(x, --count, data);
1235                         if (err)
1236                                 goto out;
1237                 }
1238         }
1239 out:
1240         spin_unlock_bh(&xfrm_state_lock);
1241         return err;
1242 }
1243 EXPORT_SYMBOL(xfrm_state_walk);
1244
1245
1246 void xfrm_replay_notify(struct xfrm_state *x, int event)
1247 {
1248         struct km_event c;
1249         /* we send notify messages in case
1250          *  1. we updated on of the sequence numbers, and the seqno difference
1251          *     is at least x->replay_maxdiff, in this case we also update the
1252          *     timeout of our timer function
1253          *  2. if x->replay_maxage has elapsed since last update,
1254          *     and there were changes
1255          *
1256          *  The state structure must be locked!
1257          */
1258
1259         switch (event) {
1260         case XFRM_REPLAY_UPDATE:
1261                 if (x->replay_maxdiff &&
1262                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1263                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1264                         if (x->xflags & XFRM_TIME_DEFER)
1265                                 event = XFRM_REPLAY_TIMEOUT;
1266                         else
1267                                 return;
1268                 }
1269
1270                 break;
1271
1272         case XFRM_REPLAY_TIMEOUT:
1273                 if ((x->replay.seq == x->preplay.seq) &&
1274                     (x->replay.bitmap == x->preplay.bitmap) &&
1275                     (x->replay.oseq == x->preplay.oseq)) {
1276                         x->xflags |= XFRM_TIME_DEFER;
1277                         return;
1278                 }
1279
1280                 break;
1281         }
1282
1283         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1284         c.event = XFRM_MSG_NEWAE;
1285         c.data.aevent = event;
1286         km_state_notify(x, &c);
1287
1288         if (x->replay_maxage &&
1289             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1290                 xfrm_state_hold(x);
1291                 x->xflags &= ~XFRM_TIME_DEFER;
1292         }
1293 }
1294 EXPORT_SYMBOL(xfrm_replay_notify);
1295
1296 static void xfrm_replay_timer_handler(unsigned long data)
1297 {
1298         struct xfrm_state *x = (struct xfrm_state*)data;
1299
1300         spin_lock(&x->lock);
1301
1302         if (x->km.state == XFRM_STATE_VALID) {
1303                 if (xfrm_aevent_is_on())
1304                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1305                 else
1306                         x->xflags |= XFRM_TIME_DEFER;
1307         }
1308
1309         spin_unlock(&x->lock);
1310         xfrm_state_put(x);
1311 }
1312
1313 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1314 {
1315         u32 diff;
1316
1317         seq = ntohl(seq);
1318
1319         if (unlikely(seq == 0))
1320                 return -EINVAL;
1321
1322         if (likely(seq > x->replay.seq))
1323                 return 0;
1324
1325         diff = x->replay.seq - seq;
1326         if (diff >= x->props.replay_window) {
1327                 x->stats.replay_window++;
1328                 return -EINVAL;
1329         }
1330
1331         if (x->replay.bitmap & (1U << diff)) {
1332                 x->stats.replay++;
1333                 return -EINVAL;
1334         }
1335         return 0;
1336 }
1337 EXPORT_SYMBOL(xfrm_replay_check);
1338
1339 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1340 {
1341         u32 diff;
1342
1343         seq = ntohl(seq);
1344
1345         if (seq > x->replay.seq) {
1346                 diff = seq - x->replay.seq;
1347                 if (diff < x->props.replay_window)
1348                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1349                 else
1350                         x->replay.bitmap = 1;
1351                 x->replay.seq = seq;
1352         } else {
1353                 diff = x->replay.seq - seq;
1354                 x->replay.bitmap |= (1U << diff);
1355         }
1356
1357         if (xfrm_aevent_is_on())
1358                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1359 }
1360 EXPORT_SYMBOL(xfrm_replay_advance);
1361
1362 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1363 static DEFINE_RWLOCK(xfrm_km_lock);
1364
1365 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1366 {
1367         struct xfrm_mgr *km;
1368
1369         read_lock(&xfrm_km_lock);
1370         list_for_each_entry(km, &xfrm_km_list, list)
1371                 if (km->notify_policy)
1372                         km->notify_policy(xp, dir, c);
1373         read_unlock(&xfrm_km_lock);
1374 }
1375
1376 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1377 {
1378         struct xfrm_mgr *km;
1379         read_lock(&xfrm_km_lock);
1380         list_for_each_entry(km, &xfrm_km_list, list)
1381                 if (km->notify)
1382                         km->notify(x, c);
1383         read_unlock(&xfrm_km_lock);
1384 }
1385
1386 EXPORT_SYMBOL(km_policy_notify);
1387 EXPORT_SYMBOL(km_state_notify);
1388
1389 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1390 {
1391         struct km_event c;
1392
1393         c.data.hard = hard;
1394         c.pid = pid;
1395         c.event = XFRM_MSG_EXPIRE;
1396         km_state_notify(x, &c);
1397
1398         if (hard)
1399                 wake_up(&km_waitq);
1400 }
1401
1402 EXPORT_SYMBOL(km_state_expired);
1403 /*
1404  * We send to all registered managers regardless of failure
1405  * We are happy with one success
1406 */
1407 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1408 {
1409         int err = -EINVAL, acqret;
1410         struct xfrm_mgr *km;
1411
1412         read_lock(&xfrm_km_lock);
1413         list_for_each_entry(km, &xfrm_km_list, list) {
1414                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1415                 if (!acqret)
1416                         err = acqret;
1417         }
1418         read_unlock(&xfrm_km_lock);
1419         return err;
1420 }
1421 EXPORT_SYMBOL(km_query);
1422
1423 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1424 {
1425         int err = -EINVAL;
1426         struct xfrm_mgr *km;
1427
1428         read_lock(&xfrm_km_lock);
1429         list_for_each_entry(km, &xfrm_km_list, list) {
1430                 if (km->new_mapping)
1431                         err = km->new_mapping(x, ipaddr, sport);
1432                 if (!err)
1433                         break;
1434         }
1435         read_unlock(&xfrm_km_lock);
1436         return err;
1437 }
1438 EXPORT_SYMBOL(km_new_mapping);
1439
1440 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1441 {
1442         struct km_event c;
1443
1444         c.data.hard = hard;
1445         c.pid = pid;
1446         c.event = XFRM_MSG_POLEXPIRE;
1447         km_policy_notify(pol, dir, &c);
1448
1449         if (hard)
1450                 wake_up(&km_waitq);
1451 }
1452 EXPORT_SYMBOL(km_policy_expired);
1453
1454 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1455 {
1456         int err = -EINVAL;
1457         int ret;
1458         struct xfrm_mgr *km;
1459
1460         read_lock(&xfrm_km_lock);
1461         list_for_each_entry(km, &xfrm_km_list, list) {
1462                 if (km->report) {
1463                         ret = km->report(proto, sel, addr);
1464                         if (!ret)
1465                                 err = ret;
1466                 }
1467         }
1468         read_unlock(&xfrm_km_lock);
1469         return err;
1470 }
1471 EXPORT_SYMBOL(km_report);
1472
1473 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1474 {
1475         int err;
1476         u8 *data;
1477         struct xfrm_mgr *km;
1478         struct xfrm_policy *pol = NULL;
1479
1480         if (optlen <= 0 || optlen > PAGE_SIZE)
1481                 return -EMSGSIZE;
1482
1483         data = kmalloc(optlen, GFP_KERNEL);
1484         if (!data)
1485                 return -ENOMEM;
1486
1487         err = -EFAULT;
1488         if (copy_from_user(data, optval, optlen))
1489                 goto out;
1490
1491         err = -EINVAL;
1492         read_lock(&xfrm_km_lock);
1493         list_for_each_entry(km, &xfrm_km_list, list) {
1494                 pol = km->compile_policy(sk, optname, data,
1495                                          optlen, &err);
1496                 if (err >= 0)
1497                         break;
1498         }
1499         read_unlock(&xfrm_km_lock);
1500
1501         if (err >= 0) {
1502                 xfrm_sk_policy_insert(sk, err, pol);
1503                 xfrm_pol_put(pol);
1504                 err = 0;
1505         }
1506
1507 out:
1508         kfree(data);
1509         return err;
1510 }
1511 EXPORT_SYMBOL(xfrm_user_policy);
1512
1513 int xfrm_register_km(struct xfrm_mgr *km)
1514 {
1515         write_lock_bh(&xfrm_km_lock);
1516         list_add_tail(&km->list, &xfrm_km_list);
1517         write_unlock_bh(&xfrm_km_lock);
1518         return 0;
1519 }
1520 EXPORT_SYMBOL(xfrm_register_km);
1521
1522 int xfrm_unregister_km(struct xfrm_mgr *km)
1523 {
1524         write_lock_bh(&xfrm_km_lock);
1525         list_del(&km->list);
1526         write_unlock_bh(&xfrm_km_lock);
1527         return 0;
1528 }
1529 EXPORT_SYMBOL(xfrm_unregister_km);
1530
1531 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1532 {
1533         int err = 0;
1534         if (unlikely(afinfo == NULL))
1535                 return -EINVAL;
1536         if (unlikely(afinfo->family >= NPROTO))
1537                 return -EAFNOSUPPORT;
1538         write_lock_bh(&xfrm_state_afinfo_lock);
1539         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1540                 err = -ENOBUFS;
1541         else
1542                 xfrm_state_afinfo[afinfo->family] = afinfo;
1543         write_unlock_bh(&xfrm_state_afinfo_lock);
1544         return err;
1545 }
1546 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1547
1548 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1549 {
1550         int err = 0;
1551         if (unlikely(afinfo == NULL))
1552                 return -EINVAL;
1553         if (unlikely(afinfo->family >= NPROTO))
1554                 return -EAFNOSUPPORT;
1555         write_lock_bh(&xfrm_state_afinfo_lock);
1556         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1557                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1558                         err = -EINVAL;
1559                 else
1560                         xfrm_state_afinfo[afinfo->family] = NULL;
1561         }
1562         write_unlock_bh(&xfrm_state_afinfo_lock);
1563         return err;
1564 }
1565 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1566
1567 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1568 {
1569         struct xfrm_state_afinfo *afinfo;
1570         if (unlikely(family >= NPROTO))
1571                 return NULL;
1572         read_lock(&xfrm_state_afinfo_lock);
1573         afinfo = xfrm_state_afinfo[family];
1574         if (unlikely(!afinfo))
1575                 read_unlock(&xfrm_state_afinfo_lock);
1576         return afinfo;
1577 }
1578
1579 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1580 {
1581         read_unlock(&xfrm_state_afinfo_lock);
1582 }
1583
1584 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1585 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1586 {
1587         if (x->tunnel) {
1588                 struct xfrm_state *t = x->tunnel;
1589
1590                 if (atomic_read(&t->tunnel_users) == 2)
1591                         xfrm_state_delete(t);
1592                 atomic_dec(&t->tunnel_users);
1593                 xfrm_state_put(t);
1594                 x->tunnel = NULL;
1595         }
1596 }
1597 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1598
1599 /*
1600  * This function is NOT optimal.  For example, with ESP it will give an
1601  * MTU that's usually two bytes short of being optimal.  However, it will
1602  * usually give an answer that's a multiple of 4 provided the input is
1603  * also a multiple of 4.
1604  */
1605 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1606 {
1607         int res = mtu;
1608
1609         res -= x->props.header_len;
1610
1611         for (;;) {
1612                 int m = res;
1613
1614                 if (m < 68)
1615                         return 68;
1616
1617                 spin_lock_bh(&x->lock);
1618                 if (x->km.state == XFRM_STATE_VALID &&
1619                     x->type && x->type->get_max_size)
1620                         m = x->type->get_max_size(x, m);
1621                 else
1622                         m += x->props.header_len;
1623                 spin_unlock_bh(&x->lock);
1624
1625                 if (m <= mtu)
1626                         break;
1627                 res -= (m - mtu);
1628         }
1629
1630         return res;
1631 }
1632
1633 int xfrm_init_state(struct xfrm_state *x)
1634 {
1635         struct xfrm_state_afinfo *afinfo;
1636         int family = x->props.family;
1637         int err;
1638
1639         err = -EAFNOSUPPORT;
1640         afinfo = xfrm_state_get_afinfo(family);
1641         if (!afinfo)
1642                 goto error;
1643
1644         err = 0;
1645         if (afinfo->init_flags)
1646                 err = afinfo->init_flags(x);
1647
1648         xfrm_state_put_afinfo(afinfo);
1649
1650         if (err)
1651                 goto error;
1652
1653         err = -EPROTONOSUPPORT;
1654         x->type = xfrm_get_type(x->id.proto, family);
1655         if (x->type == NULL)
1656                 goto error;
1657
1658         err = x->type->init_state(x);
1659         if (err)
1660                 goto error;
1661
1662         x->mode = xfrm_get_mode(x->props.mode, family);
1663         if (x->mode == NULL)
1664                 goto error;
1665
1666         x->km.state = XFRM_STATE_VALID;
1667
1668 error:
1669         return err;
1670 }
1671
1672 EXPORT_SYMBOL(xfrm_init_state);
1673  
1674 void __init xfrm_state_init(void)
1675 {
1676         unsigned int sz;
1677
1678         sz = sizeof(struct hlist_head) * 8;
1679
1680         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1681         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1682         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1683         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1684                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1685         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1686
1687         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1688 }
1689