]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/xfrm/xfrm_state.c
[IPSEC]: Remove gratuitous km wake-up events on ACQUIRE
[karo-tx-linux.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23
24 #include "xfrm_hash.h"
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
36
37 /* Each xfrm_state may be linked to two tables:
38
39    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
40    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
41       destination/tunnel endpoint. (output)
42  */
43
44 static DEFINE_SPINLOCK(xfrm_state_lock);
45
46 /* Hash table to find appropriate SA towards given target (endpoint
47  * of tunnel or destination of transport mode) allowed by selector.
48  *
49  * Main use is finding SA after policy selected tunnel or transport mode.
50  * Also, it can be used by ah/esp icmp error handler to find offending SA.
51  */
52 static struct hlist_head *xfrm_state_bydst __read_mostly;
53 static struct hlist_head *xfrm_state_bysrc __read_mostly;
54 static struct hlist_head *xfrm_state_byspi __read_mostly;
55 static unsigned int xfrm_state_hmask __read_mostly;
56 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
57 static unsigned int xfrm_state_num;
58 static unsigned int xfrm_state_genid;
59
60 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
61                                          xfrm_address_t *saddr,
62                                          u32 reqid,
63                                          unsigned short family)
64 {
65         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
66 }
67
68 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
69                                          xfrm_address_t *saddr,
70                                          unsigned short family)
71 {
72         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
73 }
74
75 static inline unsigned int
76 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
77 {
78         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
79 }
80
81 static void xfrm_hash_transfer(struct hlist_head *list,
82                                struct hlist_head *ndsttable,
83                                struct hlist_head *nsrctable,
84                                struct hlist_head *nspitable,
85                                unsigned int nhashmask)
86 {
87         struct hlist_node *entry, *tmp;
88         struct xfrm_state *x;
89
90         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
91                 unsigned int h;
92
93                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
94                                     x->props.reqid, x->props.family,
95                                     nhashmask);
96                 hlist_add_head(&x->bydst, ndsttable+h);
97
98                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
99                                     x->props.family,
100                                     nhashmask);
101                 hlist_add_head(&x->bysrc, nsrctable+h);
102
103                 if (x->id.spi) {
104                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
105                                             x->id.proto, x->props.family,
106                                             nhashmask);
107                         hlist_add_head(&x->byspi, nspitable+h);
108                 }
109         }
110 }
111
112 static unsigned long xfrm_hash_new_size(void)
113 {
114         return ((xfrm_state_hmask + 1) << 1) *
115                 sizeof(struct hlist_head);
116 }
117
118 static DEFINE_MUTEX(hash_resize_mutex);
119
120 static void xfrm_hash_resize(struct work_struct *__unused)
121 {
122         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
123         unsigned long nsize, osize;
124         unsigned int nhashmask, ohashmask;
125         int i;
126
127         mutex_lock(&hash_resize_mutex);
128
129         nsize = xfrm_hash_new_size();
130         ndst = xfrm_hash_alloc(nsize);
131         if (!ndst)
132                 goto out_unlock;
133         nsrc = xfrm_hash_alloc(nsize);
134         if (!nsrc) {
135                 xfrm_hash_free(ndst, nsize);
136                 goto out_unlock;
137         }
138         nspi = xfrm_hash_alloc(nsize);
139         if (!nspi) {
140                 xfrm_hash_free(ndst, nsize);
141                 xfrm_hash_free(nsrc, nsize);
142                 goto out_unlock;
143         }
144
145         spin_lock_bh(&xfrm_state_lock);
146
147         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
148         for (i = xfrm_state_hmask; i >= 0; i--)
149                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
150                                    nhashmask);
151
152         odst = xfrm_state_bydst;
153         osrc = xfrm_state_bysrc;
154         ospi = xfrm_state_byspi;
155         ohashmask = xfrm_state_hmask;
156
157         xfrm_state_bydst = ndst;
158         xfrm_state_bysrc = nsrc;
159         xfrm_state_byspi = nspi;
160         xfrm_state_hmask = nhashmask;
161
162         spin_unlock_bh(&xfrm_state_lock);
163
164         osize = (ohashmask + 1) * sizeof(struct hlist_head);
165         xfrm_hash_free(odst, osize);
166         xfrm_hash_free(osrc, osize);
167         xfrm_hash_free(ospi, osize);
168
169 out_unlock:
170         mutex_unlock(&hash_resize_mutex);
171 }
172
173 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
174
175 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
176 EXPORT_SYMBOL(km_waitq);
177
178 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
179 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
180
181 static struct work_struct xfrm_state_gc_work;
182 static HLIST_HEAD(xfrm_state_gc_list);
183 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
184
185 int __xfrm_state_delete(struct xfrm_state *x);
186
187 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
188 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
189
190 static void xfrm_state_gc_destroy(struct xfrm_state *x)
191 {
192         del_timer_sync(&x->timer);
193         del_timer_sync(&x->rtimer);
194         kfree(x->aalg);
195         kfree(x->ealg);
196         kfree(x->calg);
197         kfree(x->encap);
198         kfree(x->coaddr);
199         if (x->mode)
200                 xfrm_put_mode(x->mode);
201         if (x->type) {
202                 x->type->destructor(x);
203                 xfrm_put_type(x->type);
204         }
205         security_xfrm_state_free(x);
206         kfree(x);
207 }
208
209 static void xfrm_state_gc_task(struct work_struct *data)
210 {
211         struct xfrm_state *x;
212         struct hlist_node *entry, *tmp;
213         struct hlist_head gc_list;
214
215         spin_lock_bh(&xfrm_state_gc_lock);
216         gc_list.first = xfrm_state_gc_list.first;
217         INIT_HLIST_HEAD(&xfrm_state_gc_list);
218         spin_unlock_bh(&xfrm_state_gc_lock);
219
220         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
221                 xfrm_state_gc_destroy(x);
222
223         wake_up(&km_waitq);
224 }
225
226 static inline unsigned long make_jiffies(long secs)
227 {
228         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
229                 return MAX_SCHEDULE_TIMEOUT-1;
230         else
231                 return secs*HZ;
232 }
233
234 static void xfrm_timer_handler(unsigned long data)
235 {
236         struct xfrm_state *x = (struct xfrm_state*)data;
237         unsigned long now = get_seconds();
238         long next = LONG_MAX;
239         int warn = 0;
240         int err = 0;
241
242         spin_lock(&x->lock);
243         if (x->km.state == XFRM_STATE_DEAD)
244                 goto out;
245         if (x->km.state == XFRM_STATE_EXPIRED)
246                 goto expired;
247         if (x->lft.hard_add_expires_seconds) {
248                 long tmo = x->lft.hard_add_expires_seconds +
249                         x->curlft.add_time - now;
250                 if (tmo <= 0)
251                         goto expired;
252                 if (tmo < next)
253                         next = tmo;
254         }
255         if (x->lft.hard_use_expires_seconds) {
256                 long tmo = x->lft.hard_use_expires_seconds +
257                         (x->curlft.use_time ? : now) - now;
258                 if (tmo <= 0)
259                         goto expired;
260                 if (tmo < next)
261                         next = tmo;
262         }
263         if (x->km.dying)
264                 goto resched;
265         if (x->lft.soft_add_expires_seconds) {
266                 long tmo = x->lft.soft_add_expires_seconds +
267                         x->curlft.add_time - now;
268                 if (tmo <= 0)
269                         warn = 1;
270                 else if (tmo < next)
271                         next = tmo;
272         }
273         if (x->lft.soft_use_expires_seconds) {
274                 long tmo = x->lft.soft_use_expires_seconds +
275                         (x->curlft.use_time ? : now) - now;
276                 if (tmo <= 0)
277                         warn = 1;
278                 else if (tmo < next)
279                         next = tmo;
280         }
281
282         x->km.dying = warn;
283         if (warn)
284                 km_state_expired(x, 0, 0);
285 resched:
286         if (next != LONG_MAX)
287                 mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289         goto out;
290
291 expired:
292         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293                 x->km.state = XFRM_STATE_EXPIRED;
294                 wake_up(&km_waitq);
295                 next = 2;
296                 goto resched;
297         }
298
299         err = __xfrm_state_delete(x);
300         if (!err && x->id.spi)
301                 km_state_expired(x, 1, 0);
302
303         xfrm_audit_state_delete(x, err ? 0 : 1,
304                                 audit_get_loginuid(current->audit_context), 0);
305
306 out:
307         spin_unlock(&x->lock);
308 }
309
310 static void xfrm_replay_timer_handler(unsigned long data);
311
312 struct xfrm_state *xfrm_state_alloc(void)
313 {
314         struct xfrm_state *x;
315
316         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
317
318         if (x) {
319                 atomic_set(&x->refcnt, 1);
320                 atomic_set(&x->tunnel_users, 0);
321                 INIT_HLIST_NODE(&x->bydst);
322                 INIT_HLIST_NODE(&x->bysrc);
323                 INIT_HLIST_NODE(&x->byspi);
324                 init_timer(&x->timer);
325                 x->timer.function = xfrm_timer_handler;
326                 x->timer.data     = (unsigned long)x;
327                 init_timer(&x->rtimer);
328                 x->rtimer.function = xfrm_replay_timer_handler;
329                 x->rtimer.data     = (unsigned long)x;
330                 x->curlft.add_time = get_seconds();
331                 x->lft.soft_byte_limit = XFRM_INF;
332                 x->lft.soft_packet_limit = XFRM_INF;
333                 x->lft.hard_byte_limit = XFRM_INF;
334                 x->lft.hard_packet_limit = XFRM_INF;
335                 x->replay_maxage = 0;
336                 x->replay_maxdiff = 0;
337                 spin_lock_init(&x->lock);
338         }
339         return x;
340 }
341 EXPORT_SYMBOL(xfrm_state_alloc);
342
343 void __xfrm_state_destroy(struct xfrm_state *x)
344 {
345         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
346
347         spin_lock_bh(&xfrm_state_gc_lock);
348         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
349         spin_unlock_bh(&xfrm_state_gc_lock);
350         schedule_work(&xfrm_state_gc_work);
351 }
352 EXPORT_SYMBOL(__xfrm_state_destroy);
353
354 int __xfrm_state_delete(struct xfrm_state *x)
355 {
356         int err = -ESRCH;
357
358         if (x->km.state != XFRM_STATE_DEAD) {
359                 x->km.state = XFRM_STATE_DEAD;
360                 spin_lock(&xfrm_state_lock);
361                 hlist_del(&x->bydst);
362                 hlist_del(&x->bysrc);
363                 if (x->id.spi)
364                         hlist_del(&x->byspi);
365                 xfrm_state_num--;
366                 spin_unlock(&xfrm_state_lock);
367
368                 /* All xfrm_state objects are created by xfrm_state_alloc.
369                  * The xfrm_state_alloc call gives a reference, and that
370                  * is what we are dropping here.
371                  */
372                 __xfrm_state_put(x);
373                 err = 0;
374         }
375
376         return err;
377 }
378 EXPORT_SYMBOL(__xfrm_state_delete);
379
380 int xfrm_state_delete(struct xfrm_state *x)
381 {
382         int err;
383
384         spin_lock_bh(&x->lock);
385         err = __xfrm_state_delete(x);
386         spin_unlock_bh(&x->lock);
387
388         return err;
389 }
390 EXPORT_SYMBOL(xfrm_state_delete);
391
392 #ifdef CONFIG_SECURITY_NETWORK_XFRM
393 static inline int
394 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
395 {
396         int i, err = 0;
397
398         for (i = 0; i <= xfrm_state_hmask; i++) {
399                 struct hlist_node *entry;
400                 struct xfrm_state *x;
401
402                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
403                         if (xfrm_id_proto_match(x->id.proto, proto) &&
404                            (err = security_xfrm_state_delete(x)) != 0) {
405                                 xfrm_audit_state_delete(x, 0,
406                                                         audit_info->loginuid,
407                                                         audit_info->secid);
408                                 return err;
409                         }
410                 }
411         }
412
413         return err;
414 }
415 #else
416 static inline int
417 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
418 {
419         return 0;
420 }
421 #endif
422
423 int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
424 {
425         int i, err = 0;
426
427         spin_lock_bh(&xfrm_state_lock);
428         err = xfrm_state_flush_secctx_check(proto, audit_info);
429         if (err)
430                 goto out;
431
432         for (i = 0; i <= xfrm_state_hmask; i++) {
433                 struct hlist_node *entry;
434                 struct xfrm_state *x;
435 restart:
436                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
437                         if (!xfrm_state_kern(x) &&
438                             xfrm_id_proto_match(x->id.proto, proto)) {
439                                 xfrm_state_hold(x);
440                                 spin_unlock_bh(&xfrm_state_lock);
441
442                                 err = xfrm_state_delete(x);
443                                 xfrm_audit_state_delete(x, err ? 0 : 1,
444                                                         audit_info->loginuid,
445                                                         audit_info->secid);
446                                 xfrm_state_put(x);
447
448                                 spin_lock_bh(&xfrm_state_lock);
449                                 goto restart;
450                         }
451                 }
452         }
453         err = 0;
454
455 out:
456         spin_unlock_bh(&xfrm_state_lock);
457         wake_up(&km_waitq);
458         return err;
459 }
460 EXPORT_SYMBOL(xfrm_state_flush);
461
462 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
463 {
464         spin_lock_bh(&xfrm_state_lock);
465         si->sadcnt = xfrm_state_num;
466         si->sadhcnt = xfrm_state_hmask;
467         si->sadhmcnt = xfrm_state_hashmax;
468         spin_unlock_bh(&xfrm_state_lock);
469 }
470 EXPORT_SYMBOL(xfrm_sad_getinfo);
471
472 static int
473 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
474                   struct xfrm_tmpl *tmpl,
475                   xfrm_address_t *daddr, xfrm_address_t *saddr,
476                   unsigned short family)
477 {
478         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
479         if (!afinfo)
480                 return -1;
481         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
482         xfrm_state_put_afinfo(afinfo);
483         return 0;
484 }
485
486 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
487 {
488         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
489         struct xfrm_state *x;
490         struct hlist_node *entry;
491
492         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
493                 if (x->props.family != family ||
494                     x->id.spi       != spi ||
495                     x->id.proto     != proto)
496                         continue;
497
498                 switch (family) {
499                 case AF_INET:
500                         if (x->id.daddr.a4 != daddr->a4)
501                                 continue;
502                         break;
503                 case AF_INET6:
504                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
505                                              (struct in6_addr *)
506                                              x->id.daddr.a6))
507                                 continue;
508                         break;
509                 }
510
511                 xfrm_state_hold(x);
512                 return x;
513         }
514
515         return NULL;
516 }
517
518 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
519 {
520         unsigned int h = xfrm_src_hash(daddr, saddr, family);
521         struct xfrm_state *x;
522         struct hlist_node *entry;
523
524         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
525                 if (x->props.family != family ||
526                     x->id.proto     != proto)
527                         continue;
528
529                 switch (family) {
530                 case AF_INET:
531                         if (x->id.daddr.a4 != daddr->a4 ||
532                             x->props.saddr.a4 != saddr->a4)
533                                 continue;
534                         break;
535                 case AF_INET6:
536                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
537                                              (struct in6_addr *)
538                                              x->id.daddr.a6) ||
539                             !ipv6_addr_equal((struct in6_addr *)saddr,
540                                              (struct in6_addr *)
541                                              x->props.saddr.a6))
542                                 continue;
543                         break;
544                 }
545
546                 xfrm_state_hold(x);
547                 return x;
548         }
549
550         return NULL;
551 }
552
553 static inline struct xfrm_state *
554 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
555 {
556         if (use_spi)
557                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
558                                            x->id.proto, family);
559         else
560                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
561                                                   &x->props.saddr,
562                                                   x->id.proto, family);
563 }
564
565 static void xfrm_hash_grow_check(int have_hash_collision)
566 {
567         if (have_hash_collision &&
568             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
569             xfrm_state_num > xfrm_state_hmask)
570                 schedule_work(&xfrm_hash_work);
571 }
572
573 struct xfrm_state *
574 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
575                 struct flowi *fl, struct xfrm_tmpl *tmpl,
576                 struct xfrm_policy *pol, int *err,
577                 unsigned short family)
578 {
579         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
580         struct hlist_node *entry;
581         struct xfrm_state *x, *x0;
582         int acquire_in_progress = 0;
583         int error = 0;
584         struct xfrm_state *best = NULL;
585
586         spin_lock_bh(&xfrm_state_lock);
587         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
588                 if (x->props.family == family &&
589                     x->props.reqid == tmpl->reqid &&
590                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
591                     xfrm_state_addr_check(x, daddr, saddr, family) &&
592                     tmpl->mode == x->props.mode &&
593                     tmpl->id.proto == x->id.proto &&
594                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
595                         /* Resolution logic:
596                            1. There is a valid state with matching selector.
597                               Done.
598                            2. Valid state with inappropriate selector. Skip.
599
600                            Entering area of "sysdeps".
601
602                            3. If state is not valid, selector is temporary,
603                               it selects only session which triggered
604                               previous resolution. Key manager will do
605                               something to install a state with proper
606                               selector.
607                          */
608                         if (x->km.state == XFRM_STATE_VALID) {
609                                 if (!xfrm_selector_match(&x->sel, fl, x->sel.family) ||
610                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
611                                         continue;
612                                 if (!best ||
613                                     best->km.dying > x->km.dying ||
614                                     (best->km.dying == x->km.dying &&
615                                      best->curlft.add_time < x->curlft.add_time))
616                                         best = x;
617                         } else if (x->km.state == XFRM_STATE_ACQ) {
618                                 acquire_in_progress = 1;
619                         } else if (x->km.state == XFRM_STATE_ERROR ||
620                                    x->km.state == XFRM_STATE_EXPIRED) {
621                                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
622                                     security_xfrm_state_pol_flow_match(x, pol, fl))
623                                         error = -ESRCH;
624                         }
625                 }
626         }
627
628         x = best;
629         if (!x && !error && !acquire_in_progress) {
630                 if (tmpl->id.spi &&
631                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
632                                               tmpl->id.proto, family)) != NULL) {
633                         xfrm_state_put(x0);
634                         error = -EEXIST;
635                         goto out;
636                 }
637                 x = xfrm_state_alloc();
638                 if (x == NULL) {
639                         error = -ENOMEM;
640                         goto out;
641                 }
642                 /* Initialize temporary selector matching only
643                  * to current session. */
644                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
645
646                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
647                 if (error) {
648                         x->km.state = XFRM_STATE_DEAD;
649                         xfrm_state_put(x);
650                         x = NULL;
651                         goto out;
652                 }
653
654                 if (km_query(x, tmpl, pol) == 0) {
655                         x->km.state = XFRM_STATE_ACQ;
656                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
657                         h = xfrm_src_hash(daddr, saddr, family);
658                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
659                         if (x->id.spi) {
660                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
661                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
662                         }
663                         x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
664                         x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
665                         add_timer(&x->timer);
666                         xfrm_state_num++;
667                         xfrm_hash_grow_check(x->bydst.next != NULL);
668                 } else {
669                         x->km.state = XFRM_STATE_DEAD;
670                         xfrm_state_put(x);
671                         x = NULL;
672                         error = -ESRCH;
673                 }
674         }
675 out:
676         if (x)
677                 xfrm_state_hold(x);
678         else
679                 *err = acquire_in_progress ? -EAGAIN : error;
680         spin_unlock_bh(&xfrm_state_lock);
681         return x;
682 }
683
684 struct xfrm_state *
685 xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
686                     unsigned short family, u8 mode, u8 proto, u32 reqid)
687 {
688         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
689         struct xfrm_state *rx = NULL, *x = NULL;
690         struct hlist_node *entry;
691
692         spin_lock(&xfrm_state_lock);
693         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
694                 if (x->props.family == family &&
695                     x->props.reqid == reqid &&
696                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
697                     xfrm_state_addr_check(x, daddr, saddr, family) &&
698                     mode == x->props.mode &&
699                     proto == x->id.proto &&
700                     x->km.state == XFRM_STATE_VALID) {
701                         rx = x;
702                         break;
703                 }
704         }
705
706         if (rx)
707                 xfrm_state_hold(rx);
708         spin_unlock(&xfrm_state_lock);
709
710
711         return rx;
712 }
713 EXPORT_SYMBOL(xfrm_stateonly_find);
714
715 static void __xfrm_state_insert(struct xfrm_state *x)
716 {
717         unsigned int h;
718
719         x->genid = ++xfrm_state_genid;
720
721         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
722                           x->props.reqid, x->props.family);
723         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
724
725         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
726         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
727
728         if (x->id.spi) {
729                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
730                                   x->props.family);
731
732                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
733         }
734
735         mod_timer(&x->timer, jiffies + HZ);
736         if (x->replay_maxage)
737                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
738
739         wake_up(&km_waitq);
740
741         xfrm_state_num++;
742
743         xfrm_hash_grow_check(x->bydst.next != NULL);
744 }
745
746 /* xfrm_state_lock is held */
747 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
748 {
749         unsigned short family = xnew->props.family;
750         u32 reqid = xnew->props.reqid;
751         struct xfrm_state *x;
752         struct hlist_node *entry;
753         unsigned int h;
754
755         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
756         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
757                 if (x->props.family     == family &&
758                     x->props.reqid      == reqid &&
759                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
760                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
761                         x->genid = xfrm_state_genid;
762         }
763 }
764
765 void xfrm_state_insert(struct xfrm_state *x)
766 {
767         spin_lock_bh(&xfrm_state_lock);
768         __xfrm_state_bump_genids(x);
769         __xfrm_state_insert(x);
770         spin_unlock_bh(&xfrm_state_lock);
771 }
772 EXPORT_SYMBOL(xfrm_state_insert);
773
774 /* xfrm_state_lock is held */
775 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
776 {
777         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
778         struct hlist_node *entry;
779         struct xfrm_state *x;
780
781         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
782                 if (x->props.reqid  != reqid ||
783                     x->props.mode   != mode ||
784                     x->props.family != family ||
785                     x->km.state     != XFRM_STATE_ACQ ||
786                     x->id.spi       != 0 ||
787                     x->id.proto     != proto)
788                         continue;
789
790                 switch (family) {
791                 case AF_INET:
792                         if (x->id.daddr.a4    != daddr->a4 ||
793                             x->props.saddr.a4 != saddr->a4)
794                                 continue;
795                         break;
796                 case AF_INET6:
797                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
798                                              (struct in6_addr *)daddr) ||
799                             !ipv6_addr_equal((struct in6_addr *)
800                                              x->props.saddr.a6,
801                                              (struct in6_addr *)saddr))
802                                 continue;
803                         break;
804                 }
805
806                 xfrm_state_hold(x);
807                 return x;
808         }
809
810         if (!create)
811                 return NULL;
812
813         x = xfrm_state_alloc();
814         if (likely(x)) {
815                 switch (family) {
816                 case AF_INET:
817                         x->sel.daddr.a4 = daddr->a4;
818                         x->sel.saddr.a4 = saddr->a4;
819                         x->sel.prefixlen_d = 32;
820                         x->sel.prefixlen_s = 32;
821                         x->props.saddr.a4 = saddr->a4;
822                         x->id.daddr.a4 = daddr->a4;
823                         break;
824
825                 case AF_INET6:
826                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
827                                        (struct in6_addr *)daddr);
828                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
829                                        (struct in6_addr *)saddr);
830                         x->sel.prefixlen_d = 128;
831                         x->sel.prefixlen_s = 128;
832                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
833                                        (struct in6_addr *)saddr);
834                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
835                                        (struct in6_addr *)daddr);
836                         break;
837                 }
838
839                 x->km.state = XFRM_STATE_ACQ;
840                 x->id.proto = proto;
841                 x->props.family = family;
842                 x->props.mode = mode;
843                 x->props.reqid = reqid;
844                 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
845                 xfrm_state_hold(x);
846                 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
847                 add_timer(&x->timer);
848                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
849                 h = xfrm_src_hash(daddr, saddr, family);
850                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
851
852                 xfrm_state_num++;
853
854                 xfrm_hash_grow_check(x->bydst.next != NULL);
855         }
856
857         return x;
858 }
859
860 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
861
862 int xfrm_state_add(struct xfrm_state *x)
863 {
864         struct xfrm_state *x1;
865         int family;
866         int err;
867         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
868
869         family = x->props.family;
870
871         spin_lock_bh(&xfrm_state_lock);
872
873         x1 = __xfrm_state_locate(x, use_spi, family);
874         if (x1) {
875                 xfrm_state_put(x1);
876                 x1 = NULL;
877                 err = -EEXIST;
878                 goto out;
879         }
880
881         if (use_spi && x->km.seq) {
882                 x1 = __xfrm_find_acq_byseq(x->km.seq);
883                 if (x1 && ((x1->id.proto != x->id.proto) ||
884                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
885                         xfrm_state_put(x1);
886                         x1 = NULL;
887                 }
888         }
889
890         if (use_spi && !x1)
891                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
892                                      x->id.proto,
893                                      &x->id.daddr, &x->props.saddr, 0);
894
895         __xfrm_state_bump_genids(x);
896         __xfrm_state_insert(x);
897         err = 0;
898
899 out:
900         spin_unlock_bh(&xfrm_state_lock);
901
902         if (x1) {
903                 xfrm_state_delete(x1);
904                 xfrm_state_put(x1);
905         }
906
907         return err;
908 }
909 EXPORT_SYMBOL(xfrm_state_add);
910
911 #ifdef CONFIG_XFRM_MIGRATE
912 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
913 {
914         int err = -ENOMEM;
915         struct xfrm_state *x = xfrm_state_alloc();
916         if (!x)
917                 goto error;
918
919         memcpy(&x->id, &orig->id, sizeof(x->id));
920         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
921         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
922         x->props.mode = orig->props.mode;
923         x->props.replay_window = orig->props.replay_window;
924         x->props.reqid = orig->props.reqid;
925         x->props.family = orig->props.family;
926         x->props.saddr = orig->props.saddr;
927
928         if (orig->aalg) {
929                 x->aalg = xfrm_algo_clone(orig->aalg);
930                 if (!x->aalg)
931                         goto error;
932         }
933         x->props.aalgo = orig->props.aalgo;
934
935         if (orig->ealg) {
936                 x->ealg = xfrm_algo_clone(orig->ealg);
937                 if (!x->ealg)
938                         goto error;
939         }
940         x->props.ealgo = orig->props.ealgo;
941
942         if (orig->calg) {
943                 x->calg = xfrm_algo_clone(orig->calg);
944                 if (!x->calg)
945                         goto error;
946         }
947         x->props.calgo = orig->props.calgo;
948
949         if (orig->encap) {
950                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
951                 if (!x->encap)
952                         goto error;
953         }
954
955         if (orig->coaddr) {
956                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
957                                     GFP_KERNEL);
958                 if (!x->coaddr)
959                         goto error;
960         }
961
962         err = xfrm_init_state(x);
963         if (err)
964                 goto error;
965
966         x->props.flags = orig->props.flags;
967
968         x->curlft.add_time = orig->curlft.add_time;
969         x->km.state = orig->km.state;
970         x->km.seq = orig->km.seq;
971
972         return x;
973
974  error:
975         if (errp)
976                 *errp = err;
977         if (x) {
978                 kfree(x->aalg);
979                 kfree(x->ealg);
980                 kfree(x->calg);
981                 kfree(x->encap);
982                 kfree(x->coaddr);
983         }
984         kfree(x);
985         return NULL;
986 }
987 EXPORT_SYMBOL(xfrm_state_clone);
988
989 /* xfrm_state_lock is held */
990 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
991 {
992         unsigned int h;
993         struct xfrm_state *x;
994         struct hlist_node *entry;
995
996         if (m->reqid) {
997                 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
998                                   m->reqid, m->old_family);
999                 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
1000                         if (x->props.mode != m->mode ||
1001                             x->id.proto != m->proto)
1002                                 continue;
1003                         if (m->reqid && x->props.reqid != m->reqid)
1004                                 continue;
1005                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1006                                           m->old_family) ||
1007                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1008                                           m->old_family))
1009                                 continue;
1010                         xfrm_state_hold(x);
1011                         return x;
1012                 }
1013         } else {
1014                 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
1015                                   m->old_family);
1016                 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
1017                         if (x->props.mode != m->mode ||
1018                             x->id.proto != m->proto)
1019                                 continue;
1020                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1021                                           m->old_family) ||
1022                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1023                                           m->old_family))
1024                                 continue;
1025                         xfrm_state_hold(x);
1026                         return x;
1027                 }
1028         }
1029
1030         return NULL;
1031 }
1032 EXPORT_SYMBOL(xfrm_migrate_state_find);
1033
1034 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1035                                        struct xfrm_migrate *m)
1036 {
1037         struct xfrm_state *xc;
1038         int err;
1039
1040         xc = xfrm_state_clone(x, &err);
1041         if (!xc)
1042                 return NULL;
1043
1044         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1045         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1046
1047         /* add state */
1048         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1049                 /* a care is needed when the destination address of the
1050                    state is to be updated as it is a part of triplet */
1051                 xfrm_state_insert(xc);
1052         } else {
1053                 if ((err = xfrm_state_add(xc)) < 0)
1054                         goto error;
1055         }
1056
1057         return xc;
1058 error:
1059         kfree(xc);
1060         return NULL;
1061 }
1062 EXPORT_SYMBOL(xfrm_state_migrate);
1063 #endif
1064
1065 int xfrm_state_update(struct xfrm_state *x)
1066 {
1067         struct xfrm_state *x1;
1068         int err;
1069         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1070
1071         spin_lock_bh(&xfrm_state_lock);
1072         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1073
1074         err = -ESRCH;
1075         if (!x1)
1076                 goto out;
1077
1078         if (xfrm_state_kern(x1)) {
1079                 xfrm_state_put(x1);
1080                 err = -EEXIST;
1081                 goto out;
1082         }
1083
1084         if (x1->km.state == XFRM_STATE_ACQ) {
1085                 __xfrm_state_insert(x);
1086                 x = NULL;
1087         }
1088         err = 0;
1089
1090 out:
1091         spin_unlock_bh(&xfrm_state_lock);
1092
1093         if (err)
1094                 return err;
1095
1096         if (!x) {
1097                 xfrm_state_delete(x1);
1098                 xfrm_state_put(x1);
1099                 return 0;
1100         }
1101
1102         err = -EINVAL;
1103         spin_lock_bh(&x1->lock);
1104         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1105                 if (x->encap && x1->encap)
1106                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1107                 if (x->coaddr && x1->coaddr) {
1108                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1109                 }
1110                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1111                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1112                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1113                 x1->km.dying = 0;
1114
1115                 mod_timer(&x1->timer, jiffies + HZ);
1116                 if (x1->curlft.use_time)
1117                         xfrm_state_check_expire(x1);
1118
1119                 err = 0;
1120         }
1121         spin_unlock_bh(&x1->lock);
1122
1123         xfrm_state_put(x1);
1124
1125         return err;
1126 }
1127 EXPORT_SYMBOL(xfrm_state_update);
1128
1129 int xfrm_state_check_expire(struct xfrm_state *x)
1130 {
1131         if (!x->curlft.use_time)
1132                 x->curlft.use_time = get_seconds();
1133
1134         if (x->km.state != XFRM_STATE_VALID)
1135                 return -EINVAL;
1136
1137         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1138             x->curlft.packets >= x->lft.hard_packet_limit) {
1139                 x->km.state = XFRM_STATE_EXPIRED;
1140                 mod_timer(&x->timer, jiffies);
1141                 return -EINVAL;
1142         }
1143
1144         if (!x->km.dying &&
1145             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1146              x->curlft.packets >= x->lft.soft_packet_limit)) {
1147                 x->km.dying = 1;
1148                 km_state_expired(x, 0, 0);
1149         }
1150         return 0;
1151 }
1152 EXPORT_SYMBOL(xfrm_state_check_expire);
1153
1154 struct xfrm_state *
1155 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1156                   unsigned short family)
1157 {
1158         struct xfrm_state *x;
1159
1160         spin_lock_bh(&xfrm_state_lock);
1161         x = __xfrm_state_lookup(daddr, spi, proto, family);
1162         spin_unlock_bh(&xfrm_state_lock);
1163         return x;
1164 }
1165 EXPORT_SYMBOL(xfrm_state_lookup);
1166
1167 struct xfrm_state *
1168 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1169                          u8 proto, unsigned short family)
1170 {
1171         struct xfrm_state *x;
1172
1173         spin_lock_bh(&xfrm_state_lock);
1174         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1175         spin_unlock_bh(&xfrm_state_lock);
1176         return x;
1177 }
1178 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1179
1180 struct xfrm_state *
1181 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1182               xfrm_address_t *daddr, xfrm_address_t *saddr,
1183               int create, unsigned short family)
1184 {
1185         struct xfrm_state *x;
1186
1187         spin_lock_bh(&xfrm_state_lock);
1188         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1189         spin_unlock_bh(&xfrm_state_lock);
1190
1191         return x;
1192 }
1193 EXPORT_SYMBOL(xfrm_find_acq);
1194
1195 #ifdef CONFIG_XFRM_SUB_POLICY
1196 int
1197 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1198                unsigned short family)
1199 {
1200         int err = 0;
1201         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1202         if (!afinfo)
1203                 return -EAFNOSUPPORT;
1204
1205         spin_lock_bh(&xfrm_state_lock);
1206         if (afinfo->tmpl_sort)
1207                 err = afinfo->tmpl_sort(dst, src, n);
1208         spin_unlock_bh(&xfrm_state_lock);
1209         xfrm_state_put_afinfo(afinfo);
1210         return err;
1211 }
1212 EXPORT_SYMBOL(xfrm_tmpl_sort);
1213
1214 int
1215 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1216                 unsigned short family)
1217 {
1218         int err = 0;
1219         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1220         if (!afinfo)
1221                 return -EAFNOSUPPORT;
1222
1223         spin_lock_bh(&xfrm_state_lock);
1224         if (afinfo->state_sort)
1225                 err = afinfo->state_sort(dst, src, n);
1226         spin_unlock_bh(&xfrm_state_lock);
1227         xfrm_state_put_afinfo(afinfo);
1228         return err;
1229 }
1230 EXPORT_SYMBOL(xfrm_state_sort);
1231 #endif
1232
1233 /* Silly enough, but I'm lazy to build resolution list */
1234
1235 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1236 {
1237         int i;
1238
1239         for (i = 0; i <= xfrm_state_hmask; i++) {
1240                 struct hlist_node *entry;
1241                 struct xfrm_state *x;
1242
1243                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1244                         if (x->km.seq == seq &&
1245                             x->km.state == XFRM_STATE_ACQ) {
1246                                 xfrm_state_hold(x);
1247                                 return x;
1248                         }
1249                 }
1250         }
1251         return NULL;
1252 }
1253
1254 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1255 {
1256         struct xfrm_state *x;
1257
1258         spin_lock_bh(&xfrm_state_lock);
1259         x = __xfrm_find_acq_byseq(seq);
1260         spin_unlock_bh(&xfrm_state_lock);
1261         return x;
1262 }
1263 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1264
1265 u32 xfrm_get_acqseq(void)
1266 {
1267         u32 res;
1268         static u32 acqseq;
1269         static DEFINE_SPINLOCK(acqseq_lock);
1270
1271         spin_lock_bh(&acqseq_lock);
1272         res = (++acqseq ? : ++acqseq);
1273         spin_unlock_bh(&acqseq_lock);
1274         return res;
1275 }
1276 EXPORT_SYMBOL(xfrm_get_acqseq);
1277
1278 void
1279 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1280 {
1281         unsigned int h;
1282         struct xfrm_state *x0;
1283
1284         if (x->id.spi)
1285                 return;
1286
1287         if (minspi == maxspi) {
1288                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1289                 if (x0) {
1290                         xfrm_state_put(x0);
1291                         return;
1292                 }
1293                 x->id.spi = minspi;
1294         } else {
1295                 u32 spi = 0;
1296                 u32 low = ntohl(minspi);
1297                 u32 high = ntohl(maxspi);
1298                 for (h=0; h<high-low+1; h++) {
1299                         spi = low + net_random()%(high-low+1);
1300                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1301                         if (x0 == NULL) {
1302                                 x->id.spi = htonl(spi);
1303                                 break;
1304                         }
1305                         xfrm_state_put(x0);
1306                 }
1307         }
1308         if (x->id.spi) {
1309                 spin_lock_bh(&xfrm_state_lock);
1310                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1311                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1312                 spin_unlock_bh(&xfrm_state_lock);
1313         }
1314 }
1315 EXPORT_SYMBOL(xfrm_alloc_spi);
1316
1317 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1318                     void *data)
1319 {
1320         int i;
1321         struct xfrm_state *x, *last = NULL;
1322         struct hlist_node *entry;
1323         int count = 0;
1324         int err = 0;
1325
1326         spin_lock_bh(&xfrm_state_lock);
1327         for (i = 0; i <= xfrm_state_hmask; i++) {
1328                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1329                         if (!xfrm_id_proto_match(x->id.proto, proto))
1330                                 continue;
1331                         if (last) {
1332                                 err = func(last, count, data);
1333                                 if (err)
1334                                         goto out;
1335                         }
1336                         last = x;
1337                         count++;
1338                 }
1339         }
1340         if (count == 0) {
1341                 err = -ENOENT;
1342                 goto out;
1343         }
1344         err = func(last, 0, data);
1345 out:
1346         spin_unlock_bh(&xfrm_state_lock);
1347         return err;
1348 }
1349 EXPORT_SYMBOL(xfrm_state_walk);
1350
1351
1352 void xfrm_replay_notify(struct xfrm_state *x, int event)
1353 {
1354         struct km_event c;
1355         /* we send notify messages in case
1356          *  1. we updated on of the sequence numbers, and the seqno difference
1357          *     is at least x->replay_maxdiff, in this case we also update the
1358          *     timeout of our timer function
1359          *  2. if x->replay_maxage has elapsed since last update,
1360          *     and there were changes
1361          *
1362          *  The state structure must be locked!
1363          */
1364
1365         switch (event) {
1366         case XFRM_REPLAY_UPDATE:
1367                 if (x->replay_maxdiff &&
1368                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1369                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1370                         if (x->xflags & XFRM_TIME_DEFER)
1371                                 event = XFRM_REPLAY_TIMEOUT;
1372                         else
1373                                 return;
1374                 }
1375
1376                 break;
1377
1378         case XFRM_REPLAY_TIMEOUT:
1379                 if ((x->replay.seq == x->preplay.seq) &&
1380                     (x->replay.bitmap == x->preplay.bitmap) &&
1381                     (x->replay.oseq == x->preplay.oseq)) {
1382                         x->xflags |= XFRM_TIME_DEFER;
1383                         return;
1384                 }
1385
1386                 break;
1387         }
1388
1389         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1390         c.event = XFRM_MSG_NEWAE;
1391         c.data.aevent = event;
1392         km_state_notify(x, &c);
1393
1394         if (x->replay_maxage &&
1395             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1396                 x->xflags &= ~XFRM_TIME_DEFER;
1397 }
1398
1399 static void xfrm_replay_timer_handler(unsigned long data)
1400 {
1401         struct xfrm_state *x = (struct xfrm_state*)data;
1402
1403         spin_lock(&x->lock);
1404
1405         if (x->km.state == XFRM_STATE_VALID) {
1406                 if (xfrm_aevent_is_on())
1407                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1408                 else
1409                         x->xflags |= XFRM_TIME_DEFER;
1410         }
1411
1412         spin_unlock(&x->lock);
1413 }
1414
1415 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1416 {
1417         u32 diff;
1418         u32 seq = ntohl(net_seq);
1419
1420         if (unlikely(seq == 0))
1421                 return -EINVAL;
1422
1423         if (likely(seq > x->replay.seq))
1424                 return 0;
1425
1426         diff = x->replay.seq - seq;
1427         if (diff >= min_t(unsigned int, x->props.replay_window,
1428                           sizeof(x->replay.bitmap) * 8)) {
1429                 x->stats.replay_window++;
1430                 return -EINVAL;
1431         }
1432
1433         if (x->replay.bitmap & (1U << diff)) {
1434                 x->stats.replay++;
1435                 return -EINVAL;
1436         }
1437         return 0;
1438 }
1439 EXPORT_SYMBOL(xfrm_replay_check);
1440
1441 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1442 {
1443         u32 diff;
1444         u32 seq = ntohl(net_seq);
1445
1446         if (seq > x->replay.seq) {
1447                 diff = seq - x->replay.seq;
1448                 if (diff < x->props.replay_window)
1449                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1450                 else
1451                         x->replay.bitmap = 1;
1452                 x->replay.seq = seq;
1453         } else {
1454                 diff = x->replay.seq - seq;
1455                 x->replay.bitmap |= (1U << diff);
1456         }
1457
1458         if (xfrm_aevent_is_on())
1459                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1460 }
1461 EXPORT_SYMBOL(xfrm_replay_advance);
1462
1463 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1464 static DEFINE_RWLOCK(xfrm_km_lock);
1465
1466 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1467 {
1468         struct xfrm_mgr *km;
1469
1470         read_lock(&xfrm_km_lock);
1471         list_for_each_entry(km, &xfrm_km_list, list)
1472                 if (km->notify_policy)
1473                         km->notify_policy(xp, dir, c);
1474         read_unlock(&xfrm_km_lock);
1475 }
1476
1477 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1478 {
1479         struct xfrm_mgr *km;
1480         read_lock(&xfrm_km_lock);
1481         list_for_each_entry(km, &xfrm_km_list, list)
1482                 if (km->notify)
1483                         km->notify(x, c);
1484         read_unlock(&xfrm_km_lock);
1485 }
1486
1487 EXPORT_SYMBOL(km_policy_notify);
1488 EXPORT_SYMBOL(km_state_notify);
1489
1490 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1491 {
1492         struct km_event c;
1493
1494         c.data.hard = hard;
1495         c.pid = pid;
1496         c.event = XFRM_MSG_EXPIRE;
1497         km_state_notify(x, &c);
1498
1499         if (hard)
1500                 wake_up(&km_waitq);
1501 }
1502
1503 EXPORT_SYMBOL(km_state_expired);
1504 /*
1505  * We send to all registered managers regardless of failure
1506  * We are happy with one success
1507 */
1508 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1509 {
1510         int err = -EINVAL, acqret;
1511         struct xfrm_mgr *km;
1512
1513         read_lock(&xfrm_km_lock);
1514         list_for_each_entry(km, &xfrm_km_list, list) {
1515                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1516                 if (!acqret)
1517                         err = acqret;
1518         }
1519         read_unlock(&xfrm_km_lock);
1520         return err;
1521 }
1522 EXPORT_SYMBOL(km_query);
1523
1524 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1525 {
1526         int err = -EINVAL;
1527         struct xfrm_mgr *km;
1528
1529         read_lock(&xfrm_km_lock);
1530         list_for_each_entry(km, &xfrm_km_list, list) {
1531                 if (km->new_mapping)
1532                         err = km->new_mapping(x, ipaddr, sport);
1533                 if (!err)
1534                         break;
1535         }
1536         read_unlock(&xfrm_km_lock);
1537         return err;
1538 }
1539 EXPORT_SYMBOL(km_new_mapping);
1540
1541 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1542 {
1543         struct km_event c;
1544
1545         c.data.hard = hard;
1546         c.pid = pid;
1547         c.event = XFRM_MSG_POLEXPIRE;
1548         km_policy_notify(pol, dir, &c);
1549
1550         if (hard)
1551                 wake_up(&km_waitq);
1552 }
1553 EXPORT_SYMBOL(km_policy_expired);
1554
1555 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1556                struct xfrm_migrate *m, int num_migrate)
1557 {
1558         int err = -EINVAL;
1559         int ret;
1560         struct xfrm_mgr *km;
1561
1562         read_lock(&xfrm_km_lock);
1563         list_for_each_entry(km, &xfrm_km_list, list) {
1564                 if (km->migrate) {
1565                         ret = km->migrate(sel, dir, type, m, num_migrate);
1566                         if (!ret)
1567                                 err = ret;
1568                 }
1569         }
1570         read_unlock(&xfrm_km_lock);
1571         return err;
1572 }
1573 EXPORT_SYMBOL(km_migrate);
1574
1575 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1576 {
1577         int err = -EINVAL;
1578         int ret;
1579         struct xfrm_mgr *km;
1580
1581         read_lock(&xfrm_km_lock);
1582         list_for_each_entry(km, &xfrm_km_list, list) {
1583                 if (km->report) {
1584                         ret = km->report(proto, sel, addr);
1585                         if (!ret)
1586                                 err = ret;
1587                 }
1588         }
1589         read_unlock(&xfrm_km_lock);
1590         return err;
1591 }
1592 EXPORT_SYMBOL(km_report);
1593
1594 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1595 {
1596         int err;
1597         u8 *data;
1598         struct xfrm_mgr *km;
1599         struct xfrm_policy *pol = NULL;
1600
1601         if (optlen <= 0 || optlen > PAGE_SIZE)
1602                 return -EMSGSIZE;
1603
1604         data = kmalloc(optlen, GFP_KERNEL);
1605         if (!data)
1606                 return -ENOMEM;
1607
1608         err = -EFAULT;
1609         if (copy_from_user(data, optval, optlen))
1610                 goto out;
1611
1612         err = -EINVAL;
1613         read_lock(&xfrm_km_lock);
1614         list_for_each_entry(km, &xfrm_km_list, list) {
1615                 pol = km->compile_policy(sk, optname, data,
1616                                          optlen, &err);
1617                 if (err >= 0)
1618                         break;
1619         }
1620         read_unlock(&xfrm_km_lock);
1621
1622         if (err >= 0) {
1623                 xfrm_sk_policy_insert(sk, err, pol);
1624                 xfrm_pol_put(pol);
1625                 err = 0;
1626         }
1627
1628 out:
1629         kfree(data);
1630         return err;
1631 }
1632 EXPORT_SYMBOL(xfrm_user_policy);
1633
1634 int xfrm_register_km(struct xfrm_mgr *km)
1635 {
1636         write_lock_bh(&xfrm_km_lock);
1637         list_add_tail(&km->list, &xfrm_km_list);
1638         write_unlock_bh(&xfrm_km_lock);
1639         return 0;
1640 }
1641 EXPORT_SYMBOL(xfrm_register_km);
1642
1643 int xfrm_unregister_km(struct xfrm_mgr *km)
1644 {
1645         write_lock_bh(&xfrm_km_lock);
1646         list_del(&km->list);
1647         write_unlock_bh(&xfrm_km_lock);
1648         return 0;
1649 }
1650 EXPORT_SYMBOL(xfrm_unregister_km);
1651
1652 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1653 {
1654         int err = 0;
1655         if (unlikely(afinfo == NULL))
1656                 return -EINVAL;
1657         if (unlikely(afinfo->family >= NPROTO))
1658                 return -EAFNOSUPPORT;
1659         write_lock_bh(&xfrm_state_afinfo_lock);
1660         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1661                 err = -ENOBUFS;
1662         else
1663                 xfrm_state_afinfo[afinfo->family] = afinfo;
1664         write_unlock_bh(&xfrm_state_afinfo_lock);
1665         return err;
1666 }
1667 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1668
1669 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1670 {
1671         int err = 0;
1672         if (unlikely(afinfo == NULL))
1673                 return -EINVAL;
1674         if (unlikely(afinfo->family >= NPROTO))
1675                 return -EAFNOSUPPORT;
1676         write_lock_bh(&xfrm_state_afinfo_lock);
1677         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1678                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1679                         err = -EINVAL;
1680                 else
1681                         xfrm_state_afinfo[afinfo->family] = NULL;
1682         }
1683         write_unlock_bh(&xfrm_state_afinfo_lock);
1684         return err;
1685 }
1686 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1687
1688 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1689 {
1690         struct xfrm_state_afinfo *afinfo;
1691         if (unlikely(family >= NPROTO))
1692                 return NULL;
1693         read_lock(&xfrm_state_afinfo_lock);
1694         afinfo = xfrm_state_afinfo[family];
1695         if (unlikely(!afinfo))
1696                 read_unlock(&xfrm_state_afinfo_lock);
1697         return afinfo;
1698 }
1699
1700 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1701 {
1702         read_unlock(&xfrm_state_afinfo_lock);
1703 }
1704
1705 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1706 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1707
1708 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1709 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1710 {
1711         if (x->tunnel) {
1712                 struct xfrm_state *t = x->tunnel;
1713
1714                 if (atomic_read(&t->tunnel_users) == 2)
1715                         xfrm_state_delete(t);
1716                 atomic_dec(&t->tunnel_users);
1717                 xfrm_state_put(t);
1718                 x->tunnel = NULL;
1719         }
1720 }
1721 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1722
1723 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1724 {
1725         int res;
1726
1727         spin_lock_bh(&x->lock);
1728         if (x->km.state == XFRM_STATE_VALID &&
1729             x->type && x->type->get_mtu)
1730                 res = x->type->get_mtu(x, mtu);
1731         else
1732                 res = mtu - x->props.header_len;
1733         spin_unlock_bh(&x->lock);
1734         return res;
1735 }
1736
1737 int xfrm_init_state(struct xfrm_state *x)
1738 {
1739         struct xfrm_state_afinfo *afinfo;
1740         int family = x->props.family;
1741         int err;
1742
1743         err = -EAFNOSUPPORT;
1744         afinfo = xfrm_state_get_afinfo(family);
1745         if (!afinfo)
1746                 goto error;
1747
1748         err = 0;
1749         if (afinfo->init_flags)
1750                 err = afinfo->init_flags(x);
1751
1752         xfrm_state_put_afinfo(afinfo);
1753
1754         if (err)
1755                 goto error;
1756
1757         err = -EPROTONOSUPPORT;
1758         x->type = xfrm_get_type(x->id.proto, family);
1759         if (x->type == NULL)
1760                 goto error;
1761
1762         err = x->type->init_state(x);
1763         if (err)
1764                 goto error;
1765
1766         x->mode = xfrm_get_mode(x->props.mode, family);
1767         if (x->mode == NULL)
1768                 goto error;
1769
1770         x->km.state = XFRM_STATE_VALID;
1771
1772 error:
1773         return err;
1774 }
1775
1776 EXPORT_SYMBOL(xfrm_init_state);
1777
1778 void __init xfrm_state_init(void)
1779 {
1780         unsigned int sz;
1781
1782         sz = sizeof(struct hlist_head) * 8;
1783
1784         xfrm_state_bydst = xfrm_hash_alloc(sz);
1785         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1786         xfrm_state_byspi = xfrm_hash_alloc(sz);
1787         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1788                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1789         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1790
1791         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1792 }
1793
1794 #ifdef CONFIG_AUDITSYSCALL
1795 static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x,
1796                                                struct audit_buffer *audit_buf)
1797 {
1798         if (x->security)
1799                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
1800                                  x->security->ctx_alg, x->security->ctx_doi,
1801                                  x->security->ctx_str);
1802
1803         switch(x->props.family) {
1804         case AF_INET:
1805                 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
1806                                  NIPQUAD(x->props.saddr.a4),
1807                                  NIPQUAD(x->id.daddr.a4));
1808                 break;
1809         case AF_INET6:
1810                 {
1811                         struct in6_addr saddr6, daddr6;
1812
1813                         memcpy(&saddr6, x->props.saddr.a6,
1814                                 sizeof(struct in6_addr));
1815                         memcpy(&daddr6, x->id.daddr.a6,
1816                                 sizeof(struct in6_addr));
1817                         audit_log_format(audit_buf,
1818                                          " src=" NIP6_FMT " dst=" NIP6_FMT,
1819                                          NIP6(saddr6), NIP6(daddr6));
1820                 }
1821                 break;
1822         }
1823 }
1824
1825 void
1826 xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid)
1827 {
1828         struct audit_buffer *audit_buf;
1829         extern int audit_enabled;
1830
1831         if (audit_enabled == 0)
1832                 return;
1833         audit_buf = xfrm_audit_start(sid, auid);
1834         if (audit_buf == NULL)
1835                 return;
1836         audit_log_format(audit_buf, " op=SAD-add res=%u",result);
1837         xfrm_audit_common_stateinfo(x, audit_buf);
1838         audit_log_format(audit_buf, " spi=%lu(0x%lx)",
1839                          (unsigned long)x->id.spi, (unsigned long)x->id.spi);
1840         audit_log_end(audit_buf);
1841 }
1842 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
1843
1844 void
1845 xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid)
1846 {
1847         struct audit_buffer *audit_buf;
1848         extern int audit_enabled;
1849
1850         if (audit_enabled == 0)
1851                 return;
1852         audit_buf = xfrm_audit_start(sid, auid);
1853         if (audit_buf == NULL)
1854                 return;
1855         audit_log_format(audit_buf, " op=SAD-delete res=%u",result);
1856         xfrm_audit_common_stateinfo(x, audit_buf);
1857         audit_log_format(audit_buf, " spi=%lu(0x%lx)",
1858                          (unsigned long)x->id.spi, (unsigned long)x->id.spi);
1859         audit_log_end(audit_buf);
1860 }
1861 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
1862 #endif /* CONFIG_AUDITSYSCALL */