6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
27 EXPORT_SYMBOL(xfrm_nl);
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by daddr to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
57 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
61 h = (h ^ (h>>16)) & hmask;
65 static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
68 h = ntohl(addr->a6[2]^addr->a6[3]);
69 h = (h ^ (h>>16)) & hmask;
73 static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
75 return __xfrm4_dst_hash(addr, hmask);
78 static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
80 return __xfrm6_dst_hash(addr, hmask);
83 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
87 return __xfrm4_src_hash(addr, hmask);
89 return __xfrm6_src_hash(addr, hmask);
94 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
96 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
99 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
103 return __xfrm4_dst_hash(addr, hmask);
105 return __xfrm6_dst_hash(addr, hmask);
110 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
112 return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
115 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
119 h = ntohl(addr->a4^spi^proto);
120 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
124 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
128 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
129 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
134 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
139 return __xfrm4_spi_hash(addr, spi, proto, hmask);
141 return __xfrm6_spi_hash(addr, spi, proto, hmask);
146 static inline unsigned int
147 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
149 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
152 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
154 struct hlist_head *n;
157 n = kmalloc(sz, GFP_KERNEL);
159 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
161 n = (struct hlist_head *)
162 __get_free_pages(GFP_KERNEL, get_order(sz));
170 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
177 free_pages((unsigned long)n, get_order(sz));
180 static void xfrm_hash_transfer(struct hlist_head *list,
181 struct hlist_head *ndsttable,
182 struct hlist_head *nsrctable,
183 struct hlist_head *nspitable,
184 unsigned int nhashmask)
186 struct hlist_node *entry, *tmp;
187 struct xfrm_state *x;
189 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
192 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
193 hlist_add_head(&x->bydst, ndsttable+h);
195 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
197 hlist_add_head(&x->bysrc, nsrctable+h);
199 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
200 x->props.family, nhashmask);
201 hlist_add_head(&x->byspi, nspitable+h);
205 static unsigned long xfrm_hash_new_size(void)
207 return ((xfrm_state_hmask + 1) << 1) *
208 sizeof(struct hlist_head);
211 static DEFINE_MUTEX(hash_resize_mutex);
213 static void xfrm_hash_resize(void *__unused)
215 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
216 unsigned long nsize, osize;
217 unsigned int nhashmask, ohashmask;
220 mutex_lock(&hash_resize_mutex);
222 nsize = xfrm_hash_new_size();
223 ndst = xfrm_state_hash_alloc(nsize);
226 nsrc = xfrm_state_hash_alloc(nsize);
228 xfrm_state_hash_free(ndst, nsize);
231 nspi = xfrm_state_hash_alloc(nsize);
233 xfrm_state_hash_free(ndst, nsize);
234 xfrm_state_hash_free(nsrc, nsize);
238 spin_lock_bh(&xfrm_state_lock);
240 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
241 for (i = xfrm_state_hmask; i >= 0; i--)
242 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
245 odst = xfrm_state_bydst;
246 osrc = xfrm_state_bysrc;
247 ospi = xfrm_state_byspi;
248 ohashmask = xfrm_state_hmask;
250 xfrm_state_bydst = ndst;
251 xfrm_state_bysrc = nsrc;
252 xfrm_state_byspi = nspi;
253 xfrm_state_hmask = nhashmask;
255 spin_unlock_bh(&xfrm_state_lock);
257 osize = (ohashmask + 1) * sizeof(struct hlist_head);
258 xfrm_state_hash_free(odst, osize);
259 xfrm_state_hash_free(osrc, osize);
260 xfrm_state_hash_free(ospi, osize);
263 mutex_unlock(&hash_resize_mutex);
266 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
268 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
269 EXPORT_SYMBOL(km_waitq);
271 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
272 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
274 static struct work_struct xfrm_state_gc_work;
275 static HLIST_HEAD(xfrm_state_gc_list);
276 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
278 static int xfrm_state_gc_flush_bundles;
280 int __xfrm_state_delete(struct xfrm_state *x);
282 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
283 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
285 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
286 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
288 static void xfrm_state_gc_destroy(struct xfrm_state *x)
290 if (del_timer(&x->timer))
292 if (del_timer(&x->rtimer))
300 xfrm_put_mode(x->mode);
302 x->type->destructor(x);
303 xfrm_put_type(x->type);
305 security_xfrm_state_free(x);
309 static void xfrm_state_gc_task(void *data)
311 struct xfrm_state *x;
312 struct hlist_node *entry, *tmp;
313 struct hlist_head gc_list;
315 if (xfrm_state_gc_flush_bundles) {
316 xfrm_state_gc_flush_bundles = 0;
317 xfrm_flush_bundles();
320 spin_lock_bh(&xfrm_state_gc_lock);
321 gc_list.first = xfrm_state_gc_list.first;
322 INIT_HLIST_HEAD(&xfrm_state_gc_list);
323 spin_unlock_bh(&xfrm_state_gc_lock);
325 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
326 xfrm_state_gc_destroy(x);
331 static inline unsigned long make_jiffies(long secs)
333 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
334 return MAX_SCHEDULE_TIMEOUT-1;
339 static void xfrm_timer_handler(unsigned long data)
341 struct xfrm_state *x = (struct xfrm_state*)data;
342 unsigned long now = (unsigned long)xtime.tv_sec;
343 long next = LONG_MAX;
347 if (x->km.state == XFRM_STATE_DEAD)
349 if (x->km.state == XFRM_STATE_EXPIRED)
351 if (x->lft.hard_add_expires_seconds) {
352 long tmo = x->lft.hard_add_expires_seconds +
353 x->curlft.add_time - now;
359 if (x->lft.hard_use_expires_seconds) {
360 long tmo = x->lft.hard_use_expires_seconds +
361 (x->curlft.use_time ? : now) - now;
369 if (x->lft.soft_add_expires_seconds) {
370 long tmo = x->lft.soft_add_expires_seconds +
371 x->curlft.add_time - now;
377 if (x->lft.soft_use_expires_seconds) {
378 long tmo = x->lft.soft_use_expires_seconds +
379 (x->curlft.use_time ? : now) - now;
388 km_state_expired(x, 0, 0);
390 if (next != LONG_MAX &&
391 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
396 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
397 x->km.state = XFRM_STATE_EXPIRED;
402 if (!__xfrm_state_delete(x) && x->id.spi)
403 km_state_expired(x, 1, 0);
406 spin_unlock(&x->lock);
410 static void xfrm_replay_timer_handler(unsigned long data);
412 struct xfrm_state *xfrm_state_alloc(void)
414 struct xfrm_state *x;
416 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
419 atomic_set(&x->refcnt, 1);
420 atomic_set(&x->tunnel_users, 0);
421 INIT_HLIST_NODE(&x->bydst);
422 INIT_HLIST_NODE(&x->bysrc);
423 INIT_HLIST_NODE(&x->byspi);
424 init_timer(&x->timer);
425 x->timer.function = xfrm_timer_handler;
426 x->timer.data = (unsigned long)x;
427 init_timer(&x->rtimer);
428 x->rtimer.function = xfrm_replay_timer_handler;
429 x->rtimer.data = (unsigned long)x;
430 x->curlft.add_time = (unsigned long)xtime.tv_sec;
431 x->lft.soft_byte_limit = XFRM_INF;
432 x->lft.soft_packet_limit = XFRM_INF;
433 x->lft.hard_byte_limit = XFRM_INF;
434 x->lft.hard_packet_limit = XFRM_INF;
435 x->replay_maxage = 0;
436 x->replay_maxdiff = 0;
437 spin_lock_init(&x->lock);
441 EXPORT_SYMBOL(xfrm_state_alloc);
443 void __xfrm_state_destroy(struct xfrm_state *x)
445 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
447 spin_lock_bh(&xfrm_state_gc_lock);
448 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
449 spin_unlock_bh(&xfrm_state_gc_lock);
450 schedule_work(&xfrm_state_gc_work);
452 EXPORT_SYMBOL(__xfrm_state_destroy);
454 int __xfrm_state_delete(struct xfrm_state *x)
458 if (x->km.state != XFRM_STATE_DEAD) {
459 x->km.state = XFRM_STATE_DEAD;
460 spin_lock(&xfrm_state_lock);
461 hlist_del(&x->bydst);
463 hlist_del(&x->bysrc);
466 hlist_del(&x->byspi);
470 spin_unlock(&xfrm_state_lock);
471 if (del_timer(&x->timer))
473 if (del_timer(&x->rtimer))
476 /* The number two in this test is the reference
477 * mentioned in the comment below plus the reference
478 * our caller holds. A larger value means that
479 * there are DSTs attached to this xfrm_state.
481 if (atomic_read(&x->refcnt) > 2) {
482 xfrm_state_gc_flush_bundles = 1;
483 schedule_work(&xfrm_state_gc_work);
486 /* All xfrm_state objects are created by xfrm_state_alloc.
487 * The xfrm_state_alloc call gives a reference, and that
488 * is what we are dropping here.
496 EXPORT_SYMBOL(__xfrm_state_delete);
498 int xfrm_state_delete(struct xfrm_state *x)
502 spin_lock_bh(&x->lock);
503 err = __xfrm_state_delete(x);
504 spin_unlock_bh(&x->lock);
508 EXPORT_SYMBOL(xfrm_state_delete);
510 void xfrm_state_flush(u8 proto)
514 spin_lock_bh(&xfrm_state_lock);
515 for (i = 0; i < xfrm_state_hmask; i++) {
516 struct hlist_node *entry;
517 struct xfrm_state *x;
519 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
520 if (!xfrm_state_kern(x) &&
521 xfrm_id_proto_match(x->id.proto, proto)) {
523 spin_unlock_bh(&xfrm_state_lock);
525 xfrm_state_delete(x);
528 spin_lock_bh(&xfrm_state_lock);
533 spin_unlock_bh(&xfrm_state_lock);
536 EXPORT_SYMBOL(xfrm_state_flush);
539 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
540 struct xfrm_tmpl *tmpl,
541 xfrm_address_t *daddr, xfrm_address_t *saddr,
542 unsigned short family)
544 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
547 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
548 xfrm_state_put_afinfo(afinfo);
552 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
554 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
555 struct xfrm_state *x;
556 struct hlist_node *entry;
558 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
559 if (x->props.family != family ||
561 x->id.proto != proto)
566 if (x->id.daddr.a4 != daddr->a4)
570 if (!ipv6_addr_equal((struct in6_addr *)daddr,
584 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
586 unsigned int h = xfrm_src_hash(saddr, family);
587 struct xfrm_state *x;
588 struct hlist_node *entry;
590 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
591 if (x->props.family != family ||
592 x->id.proto != proto)
597 if (x->id.daddr.a4 != daddr->a4 ||
598 x->props.saddr.a4 != saddr->a4)
602 if (!ipv6_addr_equal((struct in6_addr *)daddr,
605 !ipv6_addr_equal((struct in6_addr *)saddr,
619 static inline struct xfrm_state *
620 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
623 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
624 x->id.proto, family);
626 return __xfrm_state_lookup_byaddr(&x->id.daddr,
628 x->id.proto, family);
632 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
633 struct flowi *fl, struct xfrm_tmpl *tmpl,
634 struct xfrm_policy *pol, int *err,
635 unsigned short family)
637 unsigned int h = xfrm_dst_hash(daddr, family);
638 struct hlist_node *entry;
639 struct xfrm_state *x, *x0;
640 int acquire_in_progress = 0;
642 struct xfrm_state *best = NULL;
644 spin_lock_bh(&xfrm_state_lock);
645 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
646 if (x->props.family == family &&
647 x->props.reqid == tmpl->reqid &&
648 !(x->props.flags & XFRM_STATE_WILDRECV) &&
649 xfrm_state_addr_check(x, daddr, saddr, family) &&
650 tmpl->mode == x->props.mode &&
651 tmpl->id.proto == x->id.proto &&
652 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
654 1. There is a valid state with matching selector.
656 2. Valid state with inappropriate selector. Skip.
658 Entering area of "sysdeps".
660 3. If state is not valid, selector is temporary,
661 it selects only session which triggered
662 previous resolution. Key manager will do
663 something to install a state with proper
666 if (x->km.state == XFRM_STATE_VALID) {
667 if (!xfrm_selector_match(&x->sel, fl, family) ||
668 !security_xfrm_state_pol_flow_match(x, pol, fl))
671 best->km.dying > x->km.dying ||
672 (best->km.dying == x->km.dying &&
673 best->curlft.add_time < x->curlft.add_time))
675 } else if (x->km.state == XFRM_STATE_ACQ) {
676 acquire_in_progress = 1;
677 } else if (x->km.state == XFRM_STATE_ERROR ||
678 x->km.state == XFRM_STATE_EXPIRED) {
679 if (xfrm_selector_match(&x->sel, fl, family) &&
680 security_xfrm_state_pol_flow_match(x, pol, fl))
687 if (!x && !error && !acquire_in_progress) {
689 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
690 tmpl->id.proto, family)) != NULL) {
695 x = xfrm_state_alloc();
700 /* Initialize temporary selector matching only
701 * to current session. */
702 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
704 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
706 x->km.state = XFRM_STATE_DEAD;
712 if (km_query(x, tmpl, pol) == 0) {
713 x->km.state = XFRM_STATE_ACQ;
714 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
716 h = xfrm_src_hash(saddr, family);
717 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
720 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
721 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
724 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
726 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
727 add_timer(&x->timer);
729 x->km.state = XFRM_STATE_DEAD;
739 *err = acquire_in_progress ? -EAGAIN : error;
740 spin_unlock_bh(&xfrm_state_lock);
744 static void __xfrm_state_insert(struct xfrm_state *x)
746 unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
748 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
751 h = xfrm_src_hash(&x->props.saddr, x->props.family);
753 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
756 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
757 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
760 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
764 if (!mod_timer(&x->timer, jiffies + HZ))
767 if (x->replay_maxage &&
768 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
775 if (x->bydst.next != NULL &&
776 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
777 xfrm_state_num > xfrm_state_hmask)
778 schedule_work(&xfrm_hash_work);
781 void xfrm_state_insert(struct xfrm_state *x)
783 spin_lock_bh(&xfrm_state_lock);
784 __xfrm_state_insert(x);
785 spin_unlock_bh(&xfrm_state_lock);
787 xfrm_flush_all_bundles();
789 EXPORT_SYMBOL(xfrm_state_insert);
791 /* xfrm_state_lock is held */
792 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
794 unsigned int h = xfrm_dst_hash(daddr, family);
795 struct hlist_node *entry;
796 struct xfrm_state *x;
798 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
799 if (x->props.reqid != reqid ||
800 x->props.mode != mode ||
801 x->props.family != family ||
802 x->km.state != XFRM_STATE_ACQ ||
808 if (x->id.daddr.a4 != daddr->a4 ||
809 x->props.saddr.a4 != saddr->a4)
813 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
814 (struct in6_addr *)daddr) ||
815 !ipv6_addr_equal((struct in6_addr *)
817 (struct in6_addr *)saddr))
829 x = xfrm_state_alloc();
833 x->sel.daddr.a4 = daddr->a4;
834 x->sel.saddr.a4 = saddr->a4;
835 x->sel.prefixlen_d = 32;
836 x->sel.prefixlen_s = 32;
837 x->props.saddr.a4 = saddr->a4;
838 x->id.daddr.a4 = daddr->a4;
842 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
843 (struct in6_addr *)daddr);
844 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
845 (struct in6_addr *)saddr);
846 x->sel.prefixlen_d = 128;
847 x->sel.prefixlen_s = 128;
848 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
849 (struct in6_addr *)saddr);
850 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
851 (struct in6_addr *)daddr);
855 x->km.state = XFRM_STATE_ACQ;
857 x->props.family = family;
858 x->props.mode = mode;
859 x->props.reqid = reqid;
860 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
862 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
863 add_timer(&x->timer);
865 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
866 h = xfrm_src_hash(saddr, family);
868 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
875 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
877 int xfrm_state_add(struct xfrm_state *x)
879 struct xfrm_state *x1;
882 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
884 family = x->props.family;
886 spin_lock_bh(&xfrm_state_lock);
888 x1 = __xfrm_state_locate(x, use_spi, family);
896 if (use_spi && x->km.seq) {
897 x1 = __xfrm_find_acq_byseq(x->km.seq);
898 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
905 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
907 &x->id.daddr, &x->props.saddr, 0);
909 __xfrm_state_insert(x);
913 spin_unlock_bh(&xfrm_state_lock);
916 xfrm_flush_all_bundles();
919 xfrm_state_delete(x1);
925 EXPORT_SYMBOL(xfrm_state_add);
927 int xfrm_state_update(struct xfrm_state *x)
929 struct xfrm_state *x1;
931 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
933 spin_lock_bh(&xfrm_state_lock);
934 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
940 if (xfrm_state_kern(x1)) {
946 if (x1->km.state == XFRM_STATE_ACQ) {
947 __xfrm_state_insert(x);
953 spin_unlock_bh(&xfrm_state_lock);
959 xfrm_state_delete(x1);
965 spin_lock_bh(&x1->lock);
966 if (likely(x1->km.state == XFRM_STATE_VALID)) {
967 if (x->encap && x1->encap)
968 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
969 if (x->coaddr && x1->coaddr) {
970 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
972 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
973 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
974 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
977 if (!mod_timer(&x1->timer, jiffies + HZ))
979 if (x1->curlft.use_time)
980 xfrm_state_check_expire(x1);
984 spin_unlock_bh(&x1->lock);
990 EXPORT_SYMBOL(xfrm_state_update);
992 int xfrm_state_check_expire(struct xfrm_state *x)
994 if (!x->curlft.use_time)
995 x->curlft.use_time = (unsigned long)xtime.tv_sec;
997 if (x->km.state != XFRM_STATE_VALID)
1000 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1001 x->curlft.packets >= x->lft.hard_packet_limit) {
1002 x->km.state = XFRM_STATE_EXPIRED;
1003 if (!mod_timer(&x->timer, jiffies))
1009 (x->curlft.bytes >= x->lft.soft_byte_limit ||
1010 x->curlft.packets >= x->lft.soft_packet_limit)) {
1012 km_state_expired(x, 0, 0);
1016 EXPORT_SYMBOL(xfrm_state_check_expire);
1018 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1020 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1021 - skb_headroom(skb);
1024 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1026 /* Check tail too... */
1030 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1032 int err = xfrm_state_check_expire(x);
1035 err = xfrm_state_check_space(x, skb);
1039 EXPORT_SYMBOL(xfrm_state_check);
1042 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1043 unsigned short family)
1045 struct xfrm_state *x;
1047 spin_lock_bh(&xfrm_state_lock);
1048 x = __xfrm_state_lookup(daddr, spi, proto, family);
1049 spin_unlock_bh(&xfrm_state_lock);
1052 EXPORT_SYMBOL(xfrm_state_lookup);
1055 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1056 u8 proto, unsigned short family)
1058 struct xfrm_state *x;
1060 spin_lock_bh(&xfrm_state_lock);
1061 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1062 spin_unlock_bh(&xfrm_state_lock);
1065 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1068 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1069 xfrm_address_t *daddr, xfrm_address_t *saddr,
1070 int create, unsigned short family)
1072 struct xfrm_state *x;
1074 spin_lock_bh(&xfrm_state_lock);
1075 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1076 spin_unlock_bh(&xfrm_state_lock);
1080 EXPORT_SYMBOL(xfrm_find_acq);
1082 #ifdef CONFIG_XFRM_SUB_POLICY
1084 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1085 unsigned short family)
1088 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1090 return -EAFNOSUPPORT;
1092 spin_lock_bh(&xfrm_state_lock);
1093 if (afinfo->tmpl_sort)
1094 err = afinfo->tmpl_sort(dst, src, n);
1095 spin_unlock_bh(&xfrm_state_lock);
1096 xfrm_state_put_afinfo(afinfo);
1099 EXPORT_SYMBOL(xfrm_tmpl_sort);
1102 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1103 unsigned short family)
1106 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1108 return -EAFNOSUPPORT;
1110 spin_lock_bh(&xfrm_state_lock);
1111 if (afinfo->state_sort)
1112 err = afinfo->state_sort(dst, src, n);
1113 spin_unlock_bh(&xfrm_state_lock);
1114 xfrm_state_put_afinfo(afinfo);
1117 EXPORT_SYMBOL(xfrm_state_sort);
1120 /* Silly enough, but I'm lazy to build resolution list */
1122 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1126 for (i = 0; i <= xfrm_state_hmask; i++) {
1127 struct hlist_node *entry;
1128 struct xfrm_state *x;
1130 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1131 if (x->km.seq == seq &&
1132 x->km.state == XFRM_STATE_ACQ) {
1141 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1143 struct xfrm_state *x;
1145 spin_lock_bh(&xfrm_state_lock);
1146 x = __xfrm_find_acq_byseq(seq);
1147 spin_unlock_bh(&xfrm_state_lock);
1150 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1152 u32 xfrm_get_acqseq(void)
1156 static DEFINE_SPINLOCK(acqseq_lock);
1158 spin_lock_bh(&acqseq_lock);
1159 res = (++acqseq ? : ++acqseq);
1160 spin_unlock_bh(&acqseq_lock);
1163 EXPORT_SYMBOL(xfrm_get_acqseq);
1166 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1169 struct xfrm_state *x0;
1174 if (minspi == maxspi) {
1175 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1183 minspi = ntohl(minspi);
1184 maxspi = ntohl(maxspi);
1185 for (h=0; h<maxspi-minspi+1; h++) {
1186 spi = minspi + net_random()%(maxspi-minspi+1);
1187 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1189 x->id.spi = htonl(spi);
1196 spin_lock_bh(&xfrm_state_lock);
1197 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1198 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1200 spin_unlock_bh(&xfrm_state_lock);
1204 EXPORT_SYMBOL(xfrm_alloc_spi);
1206 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1210 struct xfrm_state *x;
1211 struct hlist_node *entry;
1215 spin_lock_bh(&xfrm_state_lock);
1216 for (i = 0; i <= xfrm_state_hmask; i++) {
1217 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1218 if (xfrm_id_proto_match(x->id.proto, proto))
1227 for (i = 0; i <= xfrm_state_hmask; i++) {
1228 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1229 if (!xfrm_id_proto_match(x->id.proto, proto))
1231 err = func(x, --count, data);
1237 spin_unlock_bh(&xfrm_state_lock);
1240 EXPORT_SYMBOL(xfrm_state_walk);
1243 void xfrm_replay_notify(struct xfrm_state *x, int event)
1246 /* we send notify messages in case
1247 * 1. we updated on of the sequence numbers, and the seqno difference
1248 * is at least x->replay_maxdiff, in this case we also update the
1249 * timeout of our timer function
1250 * 2. if x->replay_maxage has elapsed since last update,
1251 * and there were changes
1253 * The state structure must be locked!
1257 case XFRM_REPLAY_UPDATE:
1258 if (x->replay_maxdiff &&
1259 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1260 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1261 if (x->xflags & XFRM_TIME_DEFER)
1262 event = XFRM_REPLAY_TIMEOUT;
1269 case XFRM_REPLAY_TIMEOUT:
1270 if ((x->replay.seq == x->preplay.seq) &&
1271 (x->replay.bitmap == x->preplay.bitmap) &&
1272 (x->replay.oseq == x->preplay.oseq)) {
1273 x->xflags |= XFRM_TIME_DEFER;
1280 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1281 c.event = XFRM_MSG_NEWAE;
1282 c.data.aevent = event;
1283 km_state_notify(x, &c);
1285 if (x->replay_maxage &&
1286 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1288 x->xflags &= ~XFRM_TIME_DEFER;
1291 EXPORT_SYMBOL(xfrm_replay_notify);
1293 static void xfrm_replay_timer_handler(unsigned long data)
1295 struct xfrm_state *x = (struct xfrm_state*)data;
1297 spin_lock(&x->lock);
1299 if (x->km.state == XFRM_STATE_VALID) {
1300 if (xfrm_aevent_is_on())
1301 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1303 x->xflags |= XFRM_TIME_DEFER;
1306 spin_unlock(&x->lock);
1310 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1316 if (unlikely(seq == 0))
1319 if (likely(seq > x->replay.seq))
1322 diff = x->replay.seq - seq;
1323 if (diff >= x->props.replay_window) {
1324 x->stats.replay_window++;
1328 if (x->replay.bitmap & (1U << diff)) {
1334 EXPORT_SYMBOL(xfrm_replay_check);
1336 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1342 if (seq > x->replay.seq) {
1343 diff = seq - x->replay.seq;
1344 if (diff < x->props.replay_window)
1345 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1347 x->replay.bitmap = 1;
1348 x->replay.seq = seq;
1350 diff = x->replay.seq - seq;
1351 x->replay.bitmap |= (1U << diff);
1354 if (xfrm_aevent_is_on())
1355 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1357 EXPORT_SYMBOL(xfrm_replay_advance);
1359 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1360 static DEFINE_RWLOCK(xfrm_km_lock);
1362 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1364 struct xfrm_mgr *km;
1366 read_lock(&xfrm_km_lock);
1367 list_for_each_entry(km, &xfrm_km_list, list)
1368 if (km->notify_policy)
1369 km->notify_policy(xp, dir, c);
1370 read_unlock(&xfrm_km_lock);
1373 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1375 struct xfrm_mgr *km;
1376 read_lock(&xfrm_km_lock);
1377 list_for_each_entry(km, &xfrm_km_list, list)
1380 read_unlock(&xfrm_km_lock);
1383 EXPORT_SYMBOL(km_policy_notify);
1384 EXPORT_SYMBOL(km_state_notify);
1386 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1392 c.event = XFRM_MSG_EXPIRE;
1393 km_state_notify(x, &c);
1399 EXPORT_SYMBOL(km_state_expired);
1401 * We send to all registered managers regardless of failure
1402 * We are happy with one success
1404 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1406 int err = -EINVAL, acqret;
1407 struct xfrm_mgr *km;
1409 read_lock(&xfrm_km_lock);
1410 list_for_each_entry(km, &xfrm_km_list, list) {
1411 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1415 read_unlock(&xfrm_km_lock);
1418 EXPORT_SYMBOL(km_query);
1420 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1423 struct xfrm_mgr *km;
1425 read_lock(&xfrm_km_lock);
1426 list_for_each_entry(km, &xfrm_km_list, list) {
1427 if (km->new_mapping)
1428 err = km->new_mapping(x, ipaddr, sport);
1432 read_unlock(&xfrm_km_lock);
1435 EXPORT_SYMBOL(km_new_mapping);
1437 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1443 c.event = XFRM_MSG_POLEXPIRE;
1444 km_policy_notify(pol, dir, &c);
1449 EXPORT_SYMBOL(km_policy_expired);
1451 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1455 struct xfrm_mgr *km;
1457 read_lock(&xfrm_km_lock);
1458 list_for_each_entry(km, &xfrm_km_list, list) {
1460 ret = km->report(proto, sel, addr);
1465 read_unlock(&xfrm_km_lock);
1468 EXPORT_SYMBOL(km_report);
1470 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1474 struct xfrm_mgr *km;
1475 struct xfrm_policy *pol = NULL;
1477 if (optlen <= 0 || optlen > PAGE_SIZE)
1480 data = kmalloc(optlen, GFP_KERNEL);
1485 if (copy_from_user(data, optval, optlen))
1489 read_lock(&xfrm_km_lock);
1490 list_for_each_entry(km, &xfrm_km_list, list) {
1491 pol = km->compile_policy(sk, optname, data,
1496 read_unlock(&xfrm_km_lock);
1499 xfrm_sk_policy_insert(sk, err, pol);
1508 EXPORT_SYMBOL(xfrm_user_policy);
1510 int xfrm_register_km(struct xfrm_mgr *km)
1512 write_lock_bh(&xfrm_km_lock);
1513 list_add_tail(&km->list, &xfrm_km_list);
1514 write_unlock_bh(&xfrm_km_lock);
1517 EXPORT_SYMBOL(xfrm_register_km);
1519 int xfrm_unregister_km(struct xfrm_mgr *km)
1521 write_lock_bh(&xfrm_km_lock);
1522 list_del(&km->list);
1523 write_unlock_bh(&xfrm_km_lock);
1526 EXPORT_SYMBOL(xfrm_unregister_km);
1528 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1531 if (unlikely(afinfo == NULL))
1533 if (unlikely(afinfo->family >= NPROTO))
1534 return -EAFNOSUPPORT;
1535 write_lock_bh(&xfrm_state_afinfo_lock);
1536 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1539 xfrm_state_afinfo[afinfo->family] = afinfo;
1540 write_unlock_bh(&xfrm_state_afinfo_lock);
1543 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1545 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1548 if (unlikely(afinfo == NULL))
1550 if (unlikely(afinfo->family >= NPROTO))
1551 return -EAFNOSUPPORT;
1552 write_lock_bh(&xfrm_state_afinfo_lock);
1553 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1554 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1557 xfrm_state_afinfo[afinfo->family] = NULL;
1559 write_unlock_bh(&xfrm_state_afinfo_lock);
1562 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1564 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1566 struct xfrm_state_afinfo *afinfo;
1567 if (unlikely(family >= NPROTO))
1569 read_lock(&xfrm_state_afinfo_lock);
1570 afinfo = xfrm_state_afinfo[family];
1571 if (unlikely(!afinfo))
1572 read_unlock(&xfrm_state_afinfo_lock);
1576 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1578 read_unlock(&xfrm_state_afinfo_lock);
1581 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1582 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1585 struct xfrm_state *t = x->tunnel;
1587 if (atomic_read(&t->tunnel_users) == 2)
1588 xfrm_state_delete(t);
1589 atomic_dec(&t->tunnel_users);
1594 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1597 * This function is NOT optimal. For example, with ESP it will give an
1598 * MTU that's usually two bytes short of being optimal. However, it will
1599 * usually give an answer that's a multiple of 4 provided the input is
1600 * also a multiple of 4.
1602 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1606 res -= x->props.header_len;
1614 spin_lock_bh(&x->lock);
1615 if (x->km.state == XFRM_STATE_VALID &&
1616 x->type && x->type->get_max_size)
1617 m = x->type->get_max_size(x, m);
1619 m += x->props.header_len;
1620 spin_unlock_bh(&x->lock);
1630 int xfrm_init_state(struct xfrm_state *x)
1632 struct xfrm_state_afinfo *afinfo;
1633 int family = x->props.family;
1636 err = -EAFNOSUPPORT;
1637 afinfo = xfrm_state_get_afinfo(family);
1642 if (afinfo->init_flags)
1643 err = afinfo->init_flags(x);
1645 xfrm_state_put_afinfo(afinfo);
1650 err = -EPROTONOSUPPORT;
1651 x->type = xfrm_get_type(x->id.proto, family);
1652 if (x->type == NULL)
1655 err = x->type->init_state(x);
1659 x->mode = xfrm_get_mode(x->props.mode, family);
1660 if (x->mode == NULL)
1663 x->km.state = XFRM_STATE_VALID;
1669 EXPORT_SYMBOL(xfrm_init_state);
1671 void __init xfrm_state_init(void)
1675 sz = sizeof(struct hlist_head) * 8;
1677 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1678 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1679 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1680 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1681 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1682 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1684 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);