]> git.karo-electronics.de Git - mv-sheeva.git/blob - net/xfrm/xfrm_state.c
[XFRM] Optimize SA dumping
[mv-sheeva.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23
24 #include "xfrm_hash.h"
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59                                          xfrm_address_t *saddr,
60                                          u32 reqid,
61                                          unsigned short family)
62 {
63         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64 }
65
66 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67                                          xfrm_address_t *saddr,
68                                          unsigned short family)
69 {
70         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
71 }
72
73 static inline unsigned int
74 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *entry, *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head(&x->bydst, ndsttable+h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head(&x->bysrc, nsrctable+h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head(&x->byspi, nspitable+h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(void)
111 {
112         return ((xfrm_state_hmask + 1) << 1) *
113                 sizeof(struct hlist_head);
114 }
115
116 static DEFINE_MUTEX(hash_resize_mutex);
117
118 static void xfrm_hash_resize(struct work_struct *__unused)
119 {
120         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
121         unsigned long nsize, osize;
122         unsigned int nhashmask, ohashmask;
123         int i;
124
125         mutex_lock(&hash_resize_mutex);
126
127         nsize = xfrm_hash_new_size();
128         ndst = xfrm_hash_alloc(nsize);
129         if (!ndst)
130                 goto out_unlock;
131         nsrc = xfrm_hash_alloc(nsize);
132         if (!nsrc) {
133                 xfrm_hash_free(ndst, nsize);
134                 goto out_unlock;
135         }
136         nspi = xfrm_hash_alloc(nsize);
137         if (!nspi) {
138                 xfrm_hash_free(ndst, nsize);
139                 xfrm_hash_free(nsrc, nsize);
140                 goto out_unlock;
141         }
142
143         spin_lock_bh(&xfrm_state_lock);
144
145         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
146         for (i = xfrm_state_hmask; i >= 0; i--)
147                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
148                                    nhashmask);
149
150         odst = xfrm_state_bydst;
151         osrc = xfrm_state_bysrc;
152         ospi = xfrm_state_byspi;
153         ohashmask = xfrm_state_hmask;
154
155         xfrm_state_bydst = ndst;
156         xfrm_state_bysrc = nsrc;
157         xfrm_state_byspi = nspi;
158         xfrm_state_hmask = nhashmask;
159
160         spin_unlock_bh(&xfrm_state_lock);
161
162         osize = (ohashmask + 1) * sizeof(struct hlist_head);
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166
167 out_unlock:
168         mutex_unlock(&hash_resize_mutex);
169 }
170
171 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
172
173 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
174 EXPORT_SYMBOL(km_waitq);
175
176 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
177 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
178
179 static struct work_struct xfrm_state_gc_work;
180 static HLIST_HEAD(xfrm_state_gc_list);
181 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
182
183 int __xfrm_state_delete(struct xfrm_state *x);
184
185 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
186 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
187
188 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191 static void xfrm_state_gc_destroy(struct xfrm_state *x)
192 {
193         del_timer_sync(&x->timer);
194         del_timer_sync(&x->rtimer);
195         kfree(x->aalg);
196         kfree(x->ealg);
197         kfree(x->calg);
198         kfree(x->encap);
199         kfree(x->coaddr);
200         if (x->mode)
201                 xfrm_put_mode(x->mode);
202         if (x->type) {
203                 x->type->destructor(x);
204                 xfrm_put_type(x->type);
205         }
206         security_xfrm_state_free(x);
207         kfree(x);
208 }
209
210 static void xfrm_state_gc_task(struct work_struct *data)
211 {
212         struct xfrm_state *x;
213         struct hlist_node *entry, *tmp;
214         struct hlist_head gc_list;
215
216         spin_lock_bh(&xfrm_state_gc_lock);
217         gc_list.first = xfrm_state_gc_list.first;
218         INIT_HLIST_HEAD(&xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220
221         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222                 xfrm_state_gc_destroy(x);
223
224         wake_up(&km_waitq);
225 }
226
227 static inline unsigned long make_jiffies(long secs)
228 {
229         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230                 return MAX_SCHEDULE_TIMEOUT-1;
231         else
232                 return secs*HZ;
233 }
234
235 static void xfrm_timer_handler(unsigned long data)
236 {
237         struct xfrm_state *x = (struct xfrm_state*)data;
238         unsigned long now = (unsigned long)xtime.tv_sec;
239         long next = LONG_MAX;
240         int warn = 0;
241
242         spin_lock(&x->lock);
243         if (x->km.state == XFRM_STATE_DEAD)
244                 goto out;
245         if (x->km.state == XFRM_STATE_EXPIRED)
246                 goto expired;
247         if (x->lft.hard_add_expires_seconds) {
248                 long tmo = x->lft.hard_add_expires_seconds +
249                         x->curlft.add_time - now;
250                 if (tmo <= 0)
251                         goto expired;
252                 if (tmo < next)
253                         next = tmo;
254         }
255         if (x->lft.hard_use_expires_seconds) {
256                 long tmo = x->lft.hard_use_expires_seconds +
257                         (x->curlft.use_time ? : now) - now;
258                 if (tmo <= 0)
259                         goto expired;
260                 if (tmo < next)
261                         next = tmo;
262         }
263         if (x->km.dying)
264                 goto resched;
265         if (x->lft.soft_add_expires_seconds) {
266                 long tmo = x->lft.soft_add_expires_seconds +
267                         x->curlft.add_time - now;
268                 if (tmo <= 0)
269                         warn = 1;
270                 else if (tmo < next)
271                         next = tmo;
272         }
273         if (x->lft.soft_use_expires_seconds) {
274                 long tmo = x->lft.soft_use_expires_seconds +
275                         (x->curlft.use_time ? : now) - now;
276                 if (tmo <= 0)
277                         warn = 1;
278                 else if (tmo < next)
279                         next = tmo;
280         }
281
282         x->km.dying = warn;
283         if (warn)
284                 km_state_expired(x, 0, 0);
285 resched:
286         if (next != LONG_MAX)
287                 mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289         goto out;
290
291 expired:
292         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293                 x->km.state = XFRM_STATE_EXPIRED;
294                 wake_up(&km_waitq);
295                 next = 2;
296                 goto resched;
297         }
298         if (!__xfrm_state_delete(x) && x->id.spi)
299                 km_state_expired(x, 1, 0);
300
301 out:
302         spin_unlock(&x->lock);
303 }
304
305 static void xfrm_replay_timer_handler(unsigned long data);
306
307 struct xfrm_state *xfrm_state_alloc(void)
308 {
309         struct xfrm_state *x;
310
311         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
312
313         if (x) {
314                 atomic_set(&x->refcnt, 1);
315                 atomic_set(&x->tunnel_users, 0);
316                 INIT_HLIST_NODE(&x->bydst);
317                 INIT_HLIST_NODE(&x->bysrc);
318                 INIT_HLIST_NODE(&x->byspi);
319                 init_timer(&x->timer);
320                 x->timer.function = xfrm_timer_handler;
321                 x->timer.data     = (unsigned long)x;
322                 init_timer(&x->rtimer);
323                 x->rtimer.function = xfrm_replay_timer_handler;
324                 x->rtimer.data     = (unsigned long)x;
325                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
326                 x->lft.soft_byte_limit = XFRM_INF;
327                 x->lft.soft_packet_limit = XFRM_INF;
328                 x->lft.hard_byte_limit = XFRM_INF;
329                 x->lft.hard_packet_limit = XFRM_INF;
330                 x->replay_maxage = 0;
331                 x->replay_maxdiff = 0;
332                 spin_lock_init(&x->lock);
333         }
334         return x;
335 }
336 EXPORT_SYMBOL(xfrm_state_alloc);
337
338 void __xfrm_state_destroy(struct xfrm_state *x)
339 {
340         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
341
342         spin_lock_bh(&xfrm_state_gc_lock);
343         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
344         spin_unlock_bh(&xfrm_state_gc_lock);
345         schedule_work(&xfrm_state_gc_work);
346 }
347 EXPORT_SYMBOL(__xfrm_state_destroy);
348
349 int __xfrm_state_delete(struct xfrm_state *x)
350 {
351         int err = -ESRCH;
352
353         if (x->km.state != XFRM_STATE_DEAD) {
354                 x->km.state = XFRM_STATE_DEAD;
355                 spin_lock(&xfrm_state_lock);
356                 hlist_del(&x->bydst);
357                 hlist_del(&x->bysrc);
358                 if (x->id.spi)
359                         hlist_del(&x->byspi);
360                 xfrm_state_num--;
361                 spin_unlock(&xfrm_state_lock);
362
363                 /* All xfrm_state objects are created by xfrm_state_alloc.
364                  * The xfrm_state_alloc call gives a reference, and that
365                  * is what we are dropping here.
366                  */
367                 __xfrm_state_put(x);
368                 err = 0;
369         }
370
371         return err;
372 }
373 EXPORT_SYMBOL(__xfrm_state_delete);
374
375 int xfrm_state_delete(struct xfrm_state *x)
376 {
377         int err;
378
379         spin_lock_bh(&x->lock);
380         err = __xfrm_state_delete(x);
381         spin_unlock_bh(&x->lock);
382
383         return err;
384 }
385 EXPORT_SYMBOL(xfrm_state_delete);
386
387 void xfrm_state_flush(u8 proto)
388 {
389         int i;
390
391         spin_lock_bh(&xfrm_state_lock);
392         for (i = 0; i <= xfrm_state_hmask; i++) {
393                 struct hlist_node *entry;
394                 struct xfrm_state *x;
395 restart:
396                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
397                         if (!xfrm_state_kern(x) &&
398                             xfrm_id_proto_match(x->id.proto, proto)) {
399                                 xfrm_state_hold(x);
400                                 spin_unlock_bh(&xfrm_state_lock);
401
402                                 xfrm_state_delete(x);
403                                 xfrm_state_put(x);
404
405                                 spin_lock_bh(&xfrm_state_lock);
406                                 goto restart;
407                         }
408                 }
409         }
410         spin_unlock_bh(&xfrm_state_lock);
411         wake_up(&km_waitq);
412 }
413 EXPORT_SYMBOL(xfrm_state_flush);
414
415 static int
416 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
417                   struct xfrm_tmpl *tmpl,
418                   xfrm_address_t *daddr, xfrm_address_t *saddr,
419                   unsigned short family)
420 {
421         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
422         if (!afinfo)
423                 return -1;
424         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
425         xfrm_state_put_afinfo(afinfo);
426         return 0;
427 }
428
429 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
430 {
431         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
432         struct xfrm_state *x;
433         struct hlist_node *entry;
434
435         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
436                 if (x->props.family != family ||
437                     x->id.spi       != spi ||
438                     x->id.proto     != proto)
439                         continue;
440
441                 switch (family) {
442                 case AF_INET:
443                         if (x->id.daddr.a4 != daddr->a4)
444                                 continue;
445                         break;
446                 case AF_INET6:
447                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
448                                              (struct in6_addr *)
449                                              x->id.daddr.a6))
450                                 continue;
451                         break;
452                 };
453
454                 xfrm_state_hold(x);
455                 return x;
456         }
457
458         return NULL;
459 }
460
461 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
462 {
463         unsigned int h = xfrm_src_hash(daddr, saddr, family);
464         struct xfrm_state *x;
465         struct hlist_node *entry;
466
467         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
468                 if (x->props.family != family ||
469                     x->id.proto     != proto)
470                         continue;
471
472                 switch (family) {
473                 case AF_INET:
474                         if (x->id.daddr.a4 != daddr->a4 ||
475                             x->props.saddr.a4 != saddr->a4)
476                                 continue;
477                         break;
478                 case AF_INET6:
479                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
480                                              (struct in6_addr *)
481                                              x->id.daddr.a6) ||
482                             !ipv6_addr_equal((struct in6_addr *)saddr,
483                                              (struct in6_addr *)
484                                              x->props.saddr.a6))
485                                 continue;
486                         break;
487                 };
488
489                 xfrm_state_hold(x);
490                 return x;
491         }
492
493         return NULL;
494 }
495
496 static inline struct xfrm_state *
497 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
498 {
499         if (use_spi)
500                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
501                                            x->id.proto, family);
502         else
503                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
504                                                   &x->props.saddr,
505                                                   x->id.proto, family);
506 }
507
508 static void xfrm_hash_grow_check(int have_hash_collision)
509 {
510         if (have_hash_collision &&
511             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
512             xfrm_state_num > xfrm_state_hmask)
513                 schedule_work(&xfrm_hash_work);
514 }
515
516 struct xfrm_state *
517 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
518                 struct flowi *fl, struct xfrm_tmpl *tmpl,
519                 struct xfrm_policy *pol, int *err,
520                 unsigned short family)
521 {
522         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
523         struct hlist_node *entry;
524         struct xfrm_state *x, *x0;
525         int acquire_in_progress = 0;
526         int error = 0;
527         struct xfrm_state *best = NULL;
528         
529         spin_lock_bh(&xfrm_state_lock);
530         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
531                 if (x->props.family == family &&
532                     x->props.reqid == tmpl->reqid &&
533                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
534                     xfrm_state_addr_check(x, daddr, saddr, family) &&
535                     tmpl->mode == x->props.mode &&
536                     tmpl->id.proto == x->id.proto &&
537                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
538                         /* Resolution logic:
539                            1. There is a valid state with matching selector.
540                               Done.
541                            2. Valid state with inappropriate selector. Skip.
542
543                            Entering area of "sysdeps".
544
545                            3. If state is not valid, selector is temporary,
546                               it selects only session which triggered
547                               previous resolution. Key manager will do
548                               something to install a state with proper
549                               selector.
550                          */
551                         if (x->km.state == XFRM_STATE_VALID) {
552                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
553                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
554                                         continue;
555                                 if (!best ||
556                                     best->km.dying > x->km.dying ||
557                                     (best->km.dying == x->km.dying &&
558                                      best->curlft.add_time < x->curlft.add_time))
559                                         best = x;
560                         } else if (x->km.state == XFRM_STATE_ACQ) {
561                                 acquire_in_progress = 1;
562                         } else if (x->km.state == XFRM_STATE_ERROR ||
563                                    x->km.state == XFRM_STATE_EXPIRED) {
564                                 if (xfrm_selector_match(&x->sel, fl, family) &&
565                                     security_xfrm_state_pol_flow_match(x, pol, fl))
566                                         error = -ESRCH;
567                         }
568                 }
569         }
570
571         x = best;
572         if (!x && !error && !acquire_in_progress) {
573                 if (tmpl->id.spi &&
574                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
575                                               tmpl->id.proto, family)) != NULL) {
576                         xfrm_state_put(x0);
577                         error = -EEXIST;
578                         goto out;
579                 }
580                 x = xfrm_state_alloc();
581                 if (x == NULL) {
582                         error = -ENOMEM;
583                         goto out;
584                 }
585                 /* Initialize temporary selector matching only
586                  * to current session. */
587                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
588
589                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
590                 if (error) {
591                         x->km.state = XFRM_STATE_DEAD;
592                         xfrm_state_put(x);
593                         x = NULL;
594                         goto out;
595                 }
596
597                 if (km_query(x, tmpl, pol) == 0) {
598                         x->km.state = XFRM_STATE_ACQ;
599                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
600                         h = xfrm_src_hash(daddr, saddr, family);
601                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
602                         if (x->id.spi) {
603                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
604                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
605                         }
606                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
607                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
608                         add_timer(&x->timer);
609                         xfrm_state_num++;
610                         xfrm_hash_grow_check(x->bydst.next != NULL);
611                 } else {
612                         x->km.state = XFRM_STATE_DEAD;
613                         xfrm_state_put(x);
614                         x = NULL;
615                         error = -ESRCH;
616                 }
617         }
618 out:
619         if (x)
620                 xfrm_state_hold(x);
621         else
622                 *err = acquire_in_progress ? -EAGAIN : error;
623         spin_unlock_bh(&xfrm_state_lock);
624         return x;
625 }
626
627 static void __xfrm_state_insert(struct xfrm_state *x)
628 {
629         unsigned int h;
630
631         x->genid = ++xfrm_state_genid;
632
633         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
634                           x->props.reqid, x->props.family);
635         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
636
637         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
638         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
639
640         if (x->id.spi) {
641                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
642                                   x->props.family);
643
644                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
645         }
646
647         mod_timer(&x->timer, jiffies + HZ);
648         if (x->replay_maxage)
649                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
650
651         wake_up(&km_waitq);
652
653         xfrm_state_num++;
654
655         xfrm_hash_grow_check(x->bydst.next != NULL);
656 }
657
658 /* xfrm_state_lock is held */
659 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
660 {
661         unsigned short family = xnew->props.family;
662         u32 reqid = xnew->props.reqid;
663         struct xfrm_state *x;
664         struct hlist_node *entry;
665         unsigned int h;
666
667         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
668         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
669                 if (x->props.family     == family &&
670                     x->props.reqid      == reqid &&
671                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
672                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
673                         x->genid = xfrm_state_genid;
674         }
675 }
676
677 void xfrm_state_insert(struct xfrm_state *x)
678 {
679         spin_lock_bh(&xfrm_state_lock);
680         __xfrm_state_bump_genids(x);
681         __xfrm_state_insert(x);
682         spin_unlock_bh(&xfrm_state_lock);
683 }
684 EXPORT_SYMBOL(xfrm_state_insert);
685
686 /* xfrm_state_lock is held */
687 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
688 {
689         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
690         struct hlist_node *entry;
691         struct xfrm_state *x;
692
693         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
694                 if (x->props.reqid  != reqid ||
695                     x->props.mode   != mode ||
696                     x->props.family != family ||
697                     x->km.state     != XFRM_STATE_ACQ ||
698                     x->id.spi       != 0)
699                         continue;
700
701                 switch (family) {
702                 case AF_INET:
703                         if (x->id.daddr.a4    != daddr->a4 ||
704                             x->props.saddr.a4 != saddr->a4)
705                                 continue;
706                         break;
707                 case AF_INET6:
708                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
709                                              (struct in6_addr *)daddr) ||
710                             !ipv6_addr_equal((struct in6_addr *)
711                                              x->props.saddr.a6,
712                                              (struct in6_addr *)saddr))
713                                 continue;
714                         break;
715                 };
716
717                 xfrm_state_hold(x);
718                 return x;
719         }
720
721         if (!create)
722                 return NULL;
723
724         x = xfrm_state_alloc();
725         if (likely(x)) {
726                 switch (family) {
727                 case AF_INET:
728                         x->sel.daddr.a4 = daddr->a4;
729                         x->sel.saddr.a4 = saddr->a4;
730                         x->sel.prefixlen_d = 32;
731                         x->sel.prefixlen_s = 32;
732                         x->props.saddr.a4 = saddr->a4;
733                         x->id.daddr.a4 = daddr->a4;
734                         break;
735
736                 case AF_INET6:
737                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
738                                        (struct in6_addr *)daddr);
739                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
740                                        (struct in6_addr *)saddr);
741                         x->sel.prefixlen_d = 128;
742                         x->sel.prefixlen_s = 128;
743                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
744                                        (struct in6_addr *)saddr);
745                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
746                                        (struct in6_addr *)daddr);
747                         break;
748                 };
749
750                 x->km.state = XFRM_STATE_ACQ;
751                 x->id.proto = proto;
752                 x->props.family = family;
753                 x->props.mode = mode;
754                 x->props.reqid = reqid;
755                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
756                 xfrm_state_hold(x);
757                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
758                 add_timer(&x->timer);
759                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
760                 h = xfrm_src_hash(daddr, saddr, family);
761                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
762                 wake_up(&km_waitq);
763
764                 xfrm_state_num++;
765
766                 xfrm_hash_grow_check(x->bydst.next != NULL);
767         }
768
769         return x;
770 }
771
772 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
773
774 int xfrm_state_add(struct xfrm_state *x)
775 {
776         struct xfrm_state *x1;
777         int family;
778         int err;
779         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
780
781         family = x->props.family;
782
783         spin_lock_bh(&xfrm_state_lock);
784
785         x1 = __xfrm_state_locate(x, use_spi, family);
786         if (x1) {
787                 xfrm_state_put(x1);
788                 x1 = NULL;
789                 err = -EEXIST;
790                 goto out;
791         }
792
793         if (use_spi && x->km.seq) {
794                 x1 = __xfrm_find_acq_byseq(x->km.seq);
795                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
796                         xfrm_state_put(x1);
797                         x1 = NULL;
798                 }
799         }
800
801         if (use_spi && !x1)
802                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
803                                      x->id.proto,
804                                      &x->id.daddr, &x->props.saddr, 0);
805
806         __xfrm_state_bump_genids(x);
807         __xfrm_state_insert(x);
808         err = 0;
809
810 out:
811         spin_unlock_bh(&xfrm_state_lock);
812
813         if (x1) {
814                 xfrm_state_delete(x1);
815                 xfrm_state_put(x1);
816         }
817
818         return err;
819 }
820 EXPORT_SYMBOL(xfrm_state_add);
821
822 int xfrm_state_update(struct xfrm_state *x)
823 {
824         struct xfrm_state *x1;
825         int err;
826         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
827
828         spin_lock_bh(&xfrm_state_lock);
829         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
830
831         err = -ESRCH;
832         if (!x1)
833                 goto out;
834
835         if (xfrm_state_kern(x1)) {
836                 xfrm_state_put(x1);
837                 err = -EEXIST;
838                 goto out;
839         }
840
841         if (x1->km.state == XFRM_STATE_ACQ) {
842                 __xfrm_state_insert(x);
843                 x = NULL;
844         }
845         err = 0;
846
847 out:
848         spin_unlock_bh(&xfrm_state_lock);
849
850         if (err)
851                 return err;
852
853         if (!x) {
854                 xfrm_state_delete(x1);
855                 xfrm_state_put(x1);
856                 return 0;
857         }
858
859         err = -EINVAL;
860         spin_lock_bh(&x1->lock);
861         if (likely(x1->km.state == XFRM_STATE_VALID)) {
862                 if (x->encap && x1->encap)
863                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
864                 if (x->coaddr && x1->coaddr) {
865                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
866                 }
867                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
868                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
869                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
870                 x1->km.dying = 0;
871
872                 mod_timer(&x1->timer, jiffies + HZ);
873                 if (x1->curlft.use_time)
874                         xfrm_state_check_expire(x1);
875
876                 err = 0;
877         }
878         spin_unlock_bh(&x1->lock);
879
880         xfrm_state_put(x1);
881
882         return err;
883 }
884 EXPORT_SYMBOL(xfrm_state_update);
885
886 int xfrm_state_check_expire(struct xfrm_state *x)
887 {
888         if (!x->curlft.use_time)
889                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
890
891         if (x->km.state != XFRM_STATE_VALID)
892                 return -EINVAL;
893
894         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
895             x->curlft.packets >= x->lft.hard_packet_limit) {
896                 x->km.state = XFRM_STATE_EXPIRED;
897                 mod_timer(&x->timer, jiffies);
898                 return -EINVAL;
899         }
900
901         if (!x->km.dying &&
902             (x->curlft.bytes >= x->lft.soft_byte_limit ||
903              x->curlft.packets >= x->lft.soft_packet_limit)) {
904                 x->km.dying = 1;
905                 km_state_expired(x, 0, 0);
906         }
907         return 0;
908 }
909 EXPORT_SYMBOL(xfrm_state_check_expire);
910
911 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
912 {
913         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
914                 - skb_headroom(skb);
915
916         if (nhead > 0)
917                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
918
919         /* Check tail too... */
920         return 0;
921 }
922
923 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
924 {
925         int err = xfrm_state_check_expire(x);
926         if (err < 0)
927                 goto err;
928         err = xfrm_state_check_space(x, skb);
929 err:
930         return err;
931 }
932 EXPORT_SYMBOL(xfrm_state_check);
933
934 struct xfrm_state *
935 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
936                   unsigned short family)
937 {
938         struct xfrm_state *x;
939
940         spin_lock_bh(&xfrm_state_lock);
941         x = __xfrm_state_lookup(daddr, spi, proto, family);
942         spin_unlock_bh(&xfrm_state_lock);
943         return x;
944 }
945 EXPORT_SYMBOL(xfrm_state_lookup);
946
947 struct xfrm_state *
948 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
949                          u8 proto, unsigned short family)
950 {
951         struct xfrm_state *x;
952
953         spin_lock_bh(&xfrm_state_lock);
954         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
955         spin_unlock_bh(&xfrm_state_lock);
956         return x;
957 }
958 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
959
960 struct xfrm_state *
961 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
962               xfrm_address_t *daddr, xfrm_address_t *saddr, 
963               int create, unsigned short family)
964 {
965         struct xfrm_state *x;
966
967         spin_lock_bh(&xfrm_state_lock);
968         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
969         spin_unlock_bh(&xfrm_state_lock);
970
971         return x;
972 }
973 EXPORT_SYMBOL(xfrm_find_acq);
974
975 #ifdef CONFIG_XFRM_SUB_POLICY
976 int
977 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
978                unsigned short family)
979 {
980         int err = 0;
981         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
982         if (!afinfo)
983                 return -EAFNOSUPPORT;
984
985         spin_lock_bh(&xfrm_state_lock);
986         if (afinfo->tmpl_sort)
987                 err = afinfo->tmpl_sort(dst, src, n);
988         spin_unlock_bh(&xfrm_state_lock);
989         xfrm_state_put_afinfo(afinfo);
990         return err;
991 }
992 EXPORT_SYMBOL(xfrm_tmpl_sort);
993
994 int
995 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
996                 unsigned short family)
997 {
998         int err = 0;
999         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1000         if (!afinfo)
1001                 return -EAFNOSUPPORT;
1002
1003         spin_lock_bh(&xfrm_state_lock);
1004         if (afinfo->state_sort)
1005                 err = afinfo->state_sort(dst, src, n);
1006         spin_unlock_bh(&xfrm_state_lock);
1007         xfrm_state_put_afinfo(afinfo);
1008         return err;
1009 }
1010 EXPORT_SYMBOL(xfrm_state_sort);
1011 #endif
1012
1013 /* Silly enough, but I'm lazy to build resolution list */
1014
1015 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1016 {
1017         int i;
1018
1019         for (i = 0; i <= xfrm_state_hmask; i++) {
1020                 struct hlist_node *entry;
1021                 struct xfrm_state *x;
1022
1023                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1024                         if (x->km.seq == seq &&
1025                             x->km.state == XFRM_STATE_ACQ) {
1026                                 xfrm_state_hold(x);
1027                                 return x;
1028                         }
1029                 }
1030         }
1031         return NULL;
1032 }
1033
1034 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1035 {
1036         struct xfrm_state *x;
1037
1038         spin_lock_bh(&xfrm_state_lock);
1039         x = __xfrm_find_acq_byseq(seq);
1040         spin_unlock_bh(&xfrm_state_lock);
1041         return x;
1042 }
1043 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1044
1045 u32 xfrm_get_acqseq(void)
1046 {
1047         u32 res;
1048         static u32 acqseq;
1049         static DEFINE_SPINLOCK(acqseq_lock);
1050
1051         spin_lock_bh(&acqseq_lock);
1052         res = (++acqseq ? : ++acqseq);
1053         spin_unlock_bh(&acqseq_lock);
1054         return res;
1055 }
1056 EXPORT_SYMBOL(xfrm_get_acqseq);
1057
1058 void
1059 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1060 {
1061         unsigned int h;
1062         struct xfrm_state *x0;
1063
1064         if (x->id.spi)
1065                 return;
1066
1067         if (minspi == maxspi) {
1068                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1069                 if (x0) {
1070                         xfrm_state_put(x0);
1071                         return;
1072                 }
1073                 x->id.spi = minspi;
1074         } else {
1075                 u32 spi = 0;
1076                 u32 low = ntohl(minspi);
1077                 u32 high = ntohl(maxspi);
1078                 for (h=0; h<high-low+1; h++) {
1079                         spi = low + net_random()%(high-low+1);
1080                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1081                         if (x0 == NULL) {
1082                                 x->id.spi = htonl(spi);
1083                                 break;
1084                         }
1085                         xfrm_state_put(x0);
1086                 }
1087         }
1088         if (x->id.spi) {
1089                 spin_lock_bh(&xfrm_state_lock);
1090                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1091                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1092                 spin_unlock_bh(&xfrm_state_lock);
1093                 wake_up(&km_waitq);
1094         }
1095 }
1096 EXPORT_SYMBOL(xfrm_alloc_spi);
1097
1098 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1099                     void *data)
1100 {
1101         int i;
1102         struct xfrm_state *x, *last = NULL;
1103         struct hlist_node *entry;
1104         int count = 0;
1105         int err = 0;
1106
1107         spin_lock_bh(&xfrm_state_lock);
1108         for (i = 0; i <= xfrm_state_hmask; i++) {
1109                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1110                         if (!xfrm_id_proto_match(x->id.proto, proto))
1111                                 continue;
1112                         if (last) {
1113                                 err = func(last, count, data);
1114                                 if (err)
1115                                         goto out;
1116                         }
1117                         last = x;
1118                         count++;
1119                 }
1120         }
1121         if (count == 0) {
1122                 err = -ENOENT;
1123                 goto out;
1124         }
1125         err = func(last, 0, data);
1126 out:
1127         spin_unlock_bh(&xfrm_state_lock);
1128         return err;
1129 }
1130 EXPORT_SYMBOL(xfrm_state_walk);
1131
1132
1133 void xfrm_replay_notify(struct xfrm_state *x, int event)
1134 {
1135         struct km_event c;
1136         /* we send notify messages in case
1137          *  1. we updated on of the sequence numbers, and the seqno difference
1138          *     is at least x->replay_maxdiff, in this case we also update the
1139          *     timeout of our timer function
1140          *  2. if x->replay_maxage has elapsed since last update,
1141          *     and there were changes
1142          *
1143          *  The state structure must be locked!
1144          */
1145
1146         switch (event) {
1147         case XFRM_REPLAY_UPDATE:
1148                 if (x->replay_maxdiff &&
1149                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1150                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1151                         if (x->xflags & XFRM_TIME_DEFER)
1152                                 event = XFRM_REPLAY_TIMEOUT;
1153                         else
1154                                 return;
1155                 }
1156
1157                 break;
1158
1159         case XFRM_REPLAY_TIMEOUT:
1160                 if ((x->replay.seq == x->preplay.seq) &&
1161                     (x->replay.bitmap == x->preplay.bitmap) &&
1162                     (x->replay.oseq == x->preplay.oseq)) {
1163                         x->xflags |= XFRM_TIME_DEFER;
1164                         return;
1165                 }
1166
1167                 break;
1168         }
1169
1170         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1171         c.event = XFRM_MSG_NEWAE;
1172         c.data.aevent = event;
1173         km_state_notify(x, &c);
1174
1175         if (x->replay_maxage &&
1176             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1177                 x->xflags &= ~XFRM_TIME_DEFER;
1178 }
1179 EXPORT_SYMBOL(xfrm_replay_notify);
1180
1181 static void xfrm_replay_timer_handler(unsigned long data)
1182 {
1183         struct xfrm_state *x = (struct xfrm_state*)data;
1184
1185         spin_lock(&x->lock);
1186
1187         if (x->km.state == XFRM_STATE_VALID) {
1188                 if (xfrm_aevent_is_on())
1189                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1190                 else
1191                         x->xflags |= XFRM_TIME_DEFER;
1192         }
1193
1194         spin_unlock(&x->lock);
1195 }
1196
1197 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1198 {
1199         u32 diff;
1200         u32 seq = ntohl(net_seq);
1201
1202         if (unlikely(seq == 0))
1203                 return -EINVAL;
1204
1205         if (likely(seq > x->replay.seq))
1206                 return 0;
1207
1208         diff = x->replay.seq - seq;
1209         if (diff >= x->props.replay_window) {
1210                 x->stats.replay_window++;
1211                 return -EINVAL;
1212         }
1213
1214         if (x->replay.bitmap & (1U << diff)) {
1215                 x->stats.replay++;
1216                 return -EINVAL;
1217         }
1218         return 0;
1219 }
1220 EXPORT_SYMBOL(xfrm_replay_check);
1221
1222 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1223 {
1224         u32 diff;
1225         u32 seq = ntohl(net_seq);
1226
1227         if (seq > x->replay.seq) {
1228                 diff = seq - x->replay.seq;
1229                 if (diff < x->props.replay_window)
1230                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1231                 else
1232                         x->replay.bitmap = 1;
1233                 x->replay.seq = seq;
1234         } else {
1235                 diff = x->replay.seq - seq;
1236                 x->replay.bitmap |= (1U << diff);
1237         }
1238
1239         if (xfrm_aevent_is_on())
1240                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1241 }
1242 EXPORT_SYMBOL(xfrm_replay_advance);
1243
1244 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1245 static DEFINE_RWLOCK(xfrm_km_lock);
1246
1247 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1248 {
1249         struct xfrm_mgr *km;
1250
1251         read_lock(&xfrm_km_lock);
1252         list_for_each_entry(km, &xfrm_km_list, list)
1253                 if (km->notify_policy)
1254                         km->notify_policy(xp, dir, c);
1255         read_unlock(&xfrm_km_lock);
1256 }
1257
1258 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1259 {
1260         struct xfrm_mgr *km;
1261         read_lock(&xfrm_km_lock);
1262         list_for_each_entry(km, &xfrm_km_list, list)
1263                 if (km->notify)
1264                         km->notify(x, c);
1265         read_unlock(&xfrm_km_lock);
1266 }
1267
1268 EXPORT_SYMBOL(km_policy_notify);
1269 EXPORT_SYMBOL(km_state_notify);
1270
1271 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1272 {
1273         struct km_event c;
1274
1275         c.data.hard = hard;
1276         c.pid = pid;
1277         c.event = XFRM_MSG_EXPIRE;
1278         km_state_notify(x, &c);
1279
1280         if (hard)
1281                 wake_up(&km_waitq);
1282 }
1283
1284 EXPORT_SYMBOL(km_state_expired);
1285 /*
1286  * We send to all registered managers regardless of failure
1287  * We are happy with one success
1288 */
1289 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1290 {
1291         int err = -EINVAL, acqret;
1292         struct xfrm_mgr *km;
1293
1294         read_lock(&xfrm_km_lock);
1295         list_for_each_entry(km, &xfrm_km_list, list) {
1296                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1297                 if (!acqret)
1298                         err = acqret;
1299         }
1300         read_unlock(&xfrm_km_lock);
1301         return err;
1302 }
1303 EXPORT_SYMBOL(km_query);
1304
1305 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1306 {
1307         int err = -EINVAL;
1308         struct xfrm_mgr *km;
1309
1310         read_lock(&xfrm_km_lock);
1311         list_for_each_entry(km, &xfrm_km_list, list) {
1312                 if (km->new_mapping)
1313                         err = km->new_mapping(x, ipaddr, sport);
1314                 if (!err)
1315                         break;
1316         }
1317         read_unlock(&xfrm_km_lock);
1318         return err;
1319 }
1320 EXPORT_SYMBOL(km_new_mapping);
1321
1322 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1323 {
1324         struct km_event c;
1325
1326         c.data.hard = hard;
1327         c.pid = pid;
1328         c.event = XFRM_MSG_POLEXPIRE;
1329         km_policy_notify(pol, dir, &c);
1330
1331         if (hard)
1332                 wake_up(&km_waitq);
1333 }
1334 EXPORT_SYMBOL(km_policy_expired);
1335
1336 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1337 {
1338         int err = -EINVAL;
1339         int ret;
1340         struct xfrm_mgr *km;
1341
1342         read_lock(&xfrm_km_lock);
1343         list_for_each_entry(km, &xfrm_km_list, list) {
1344                 if (km->report) {
1345                         ret = km->report(proto, sel, addr);
1346                         if (!ret)
1347                                 err = ret;
1348                 }
1349         }
1350         read_unlock(&xfrm_km_lock);
1351         return err;
1352 }
1353 EXPORT_SYMBOL(km_report);
1354
1355 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1356 {
1357         int err;
1358         u8 *data;
1359         struct xfrm_mgr *km;
1360         struct xfrm_policy *pol = NULL;
1361
1362         if (optlen <= 0 || optlen > PAGE_SIZE)
1363                 return -EMSGSIZE;
1364
1365         data = kmalloc(optlen, GFP_KERNEL);
1366         if (!data)
1367                 return -ENOMEM;
1368
1369         err = -EFAULT;
1370         if (copy_from_user(data, optval, optlen))
1371                 goto out;
1372
1373         err = -EINVAL;
1374         read_lock(&xfrm_km_lock);
1375         list_for_each_entry(km, &xfrm_km_list, list) {
1376                 pol = km->compile_policy(sk, optname, data,
1377                                          optlen, &err);
1378                 if (err >= 0)
1379                         break;
1380         }
1381         read_unlock(&xfrm_km_lock);
1382
1383         if (err >= 0) {
1384                 xfrm_sk_policy_insert(sk, err, pol);
1385                 xfrm_pol_put(pol);
1386                 err = 0;
1387         }
1388
1389 out:
1390         kfree(data);
1391         return err;
1392 }
1393 EXPORT_SYMBOL(xfrm_user_policy);
1394
1395 int xfrm_register_km(struct xfrm_mgr *km)
1396 {
1397         write_lock_bh(&xfrm_km_lock);
1398         list_add_tail(&km->list, &xfrm_km_list);
1399         write_unlock_bh(&xfrm_km_lock);
1400         return 0;
1401 }
1402 EXPORT_SYMBOL(xfrm_register_km);
1403
1404 int xfrm_unregister_km(struct xfrm_mgr *km)
1405 {
1406         write_lock_bh(&xfrm_km_lock);
1407         list_del(&km->list);
1408         write_unlock_bh(&xfrm_km_lock);
1409         return 0;
1410 }
1411 EXPORT_SYMBOL(xfrm_unregister_km);
1412
1413 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1414 {
1415         int err = 0;
1416         if (unlikely(afinfo == NULL))
1417                 return -EINVAL;
1418         if (unlikely(afinfo->family >= NPROTO))
1419                 return -EAFNOSUPPORT;
1420         write_lock_bh(&xfrm_state_afinfo_lock);
1421         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1422                 err = -ENOBUFS;
1423         else
1424                 xfrm_state_afinfo[afinfo->family] = afinfo;
1425         write_unlock_bh(&xfrm_state_afinfo_lock);
1426         return err;
1427 }
1428 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1429
1430 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1431 {
1432         int err = 0;
1433         if (unlikely(afinfo == NULL))
1434                 return -EINVAL;
1435         if (unlikely(afinfo->family >= NPROTO))
1436                 return -EAFNOSUPPORT;
1437         write_lock_bh(&xfrm_state_afinfo_lock);
1438         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1439                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1440                         err = -EINVAL;
1441                 else
1442                         xfrm_state_afinfo[afinfo->family] = NULL;
1443         }
1444         write_unlock_bh(&xfrm_state_afinfo_lock);
1445         return err;
1446 }
1447 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1448
1449 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1450 {
1451         struct xfrm_state_afinfo *afinfo;
1452         if (unlikely(family >= NPROTO))
1453                 return NULL;
1454         read_lock(&xfrm_state_afinfo_lock);
1455         afinfo = xfrm_state_afinfo[family];
1456         if (unlikely(!afinfo))
1457                 read_unlock(&xfrm_state_afinfo_lock);
1458         return afinfo;
1459 }
1460
1461 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1462 {
1463         read_unlock(&xfrm_state_afinfo_lock);
1464 }
1465
1466 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1467 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1468 {
1469         if (x->tunnel) {
1470                 struct xfrm_state *t = x->tunnel;
1471
1472                 if (atomic_read(&t->tunnel_users) == 2)
1473                         xfrm_state_delete(t);
1474                 atomic_dec(&t->tunnel_users);
1475                 xfrm_state_put(t);
1476                 x->tunnel = NULL;
1477         }
1478 }
1479 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1480
1481 /*
1482  * This function is NOT optimal.  For example, with ESP it will give an
1483  * MTU that's usually two bytes short of being optimal.  However, it will
1484  * usually give an answer that's a multiple of 4 provided the input is
1485  * also a multiple of 4.
1486  */
1487 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1488 {
1489         int res = mtu;
1490
1491         res -= x->props.header_len;
1492
1493         for (;;) {
1494                 int m = res;
1495
1496                 if (m < 68)
1497                         return 68;
1498
1499                 spin_lock_bh(&x->lock);
1500                 if (x->km.state == XFRM_STATE_VALID &&
1501                     x->type && x->type->get_max_size)
1502                         m = x->type->get_max_size(x, m);
1503                 else
1504                         m += x->props.header_len;
1505                 spin_unlock_bh(&x->lock);
1506
1507                 if (m <= mtu)
1508                         break;
1509                 res -= (m - mtu);
1510         }
1511
1512         return res;
1513 }
1514
1515 int xfrm_init_state(struct xfrm_state *x)
1516 {
1517         struct xfrm_state_afinfo *afinfo;
1518         int family = x->props.family;
1519         int err;
1520
1521         err = -EAFNOSUPPORT;
1522         afinfo = xfrm_state_get_afinfo(family);
1523         if (!afinfo)
1524                 goto error;
1525
1526         err = 0;
1527         if (afinfo->init_flags)
1528                 err = afinfo->init_flags(x);
1529
1530         xfrm_state_put_afinfo(afinfo);
1531
1532         if (err)
1533                 goto error;
1534
1535         err = -EPROTONOSUPPORT;
1536         x->type = xfrm_get_type(x->id.proto, family);
1537         if (x->type == NULL)
1538                 goto error;
1539
1540         err = x->type->init_state(x);
1541         if (err)
1542                 goto error;
1543
1544         x->mode = xfrm_get_mode(x->props.mode, family);
1545         if (x->mode == NULL)
1546                 goto error;
1547
1548         x->km.state = XFRM_STATE_VALID;
1549
1550 error:
1551         return err;
1552 }
1553
1554 EXPORT_SYMBOL(xfrm_init_state);
1555  
1556 void __init xfrm_state_init(void)
1557 {
1558         unsigned int sz;
1559
1560         sz = sizeof(struct hlist_head) * 8;
1561
1562         xfrm_state_bydst = xfrm_hash_alloc(sz);
1563         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1564         xfrm_state_byspi = xfrm_hash_alloc(sz);
1565         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1566                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1567         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1568
1569         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1570 }
1571