]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - net/xfrm/xfrm_state.c
audit: Add auditing to ipsec
[mv-sheeva.git] / net / xfrm / xfrm_state.c
index 445263c54c94e4405305dcf3ff327b2ff85604d0..d5d3a6f1f609d8595f5af7e9d18d4119b93f0dd3 100644 (file)
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/vmalloc.h>
 #include <linux/cache.h>
 #include <asm/uaccess.h>
+#include <linux/audit.h>
+
+#include "xfrm_hash.h"
 
 struct sock *xfrm_nl;
 EXPORT_SYMBOL(xfrm_nl);
@@ -35,7 +36,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
-   2. Hash table by daddr to find what SAs exist for given
+   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
       destination/tunnel endpoint. (output)
  */
 
@@ -53,128 +54,27 @@ static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
+static unsigned int xfrm_state_genid;
 
-static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
-{
-       unsigned int h;
-       h = ntohl(addr->a4);
-       h = (h ^ (h>>16)) & hmask;
-       return h;
-}
-
-static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
-{
-       unsigned int h;
-       h = ntohl(addr->a6[2]^addr->a6[3]);
-       h = (h ^ (h>>16)) & hmask;
-       return h;
-}
-
-static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
-{
-       return __xfrm4_dst_hash(addr, hmask);
-}
-
-static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
+                                        xfrm_address_t *saddr,
+                                        u32 reqid,
+                                        unsigned short family)
 {
-       return __xfrm6_dst_hash(addr, hmask);
+       return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,  unsigned int hmask)
+static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
+                                        xfrm_address_t *saddr,
+                                        unsigned short family)
 {
-       switch (family) {
-       case AF_INET:
-               return __xfrm4_src_hash(addr, hmask);
-       case AF_INET6:
-               return __xfrm6_src_hash(addr, hmask);
-       }
-       return 0;
-}
-
-static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
-{
-       return __xfrm_src_hash(addr, family, xfrm_state_hmask);
-}
-
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
-{
-       switch (family) {
-       case AF_INET:
-               return __xfrm4_dst_hash(addr, hmask);
-       case AF_INET6:
-               return __xfrm6_dst_hash(addr, hmask);
-       }
-       return 0;
-}
-
-static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
-{
-       return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
-}
-
-static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
-                                       unsigned int hmask)
-{
-       unsigned int h;
-       h = ntohl(addr->a4^spi^proto);
-       h = (h ^ (h>>10) ^ (h>>20)) & hmask;
-       return h;
-}
-
-static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
-                                           unsigned int hmask)
-{
-       unsigned int h;
-       h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
-       h = (h ^ (h>>10) ^ (h>>20)) & hmask;
-       return h;
-}
-
-static inline
-unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
-                        unsigned int hmask)
-{
-       switch (family) {
-       case AF_INET:
-               return __xfrm4_spi_hash(addr, spi, proto, hmask);
-       case AF_INET6:
-               return __xfrm6_spi_hash(addr, spi, proto, hmask);
-       }
-       return 0;       /*XXX*/
+       return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
 }
 
 static inline unsigned int
-xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
-{
-       return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
-}
-
-static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
-{
-       struct hlist_head *n;
-
-       if (sz <= PAGE_SIZE)
-               n = kmalloc(sz, GFP_KERNEL);
-       else if (hashdist)
-               n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
-       else
-               n = (struct hlist_head *)
-                       __get_free_pages(GFP_KERNEL, get_order(sz));
-
-       if (n)
-               memset(n, 0, sz);
-
-       return n;
-}
-
-static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
+xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
 {
-       if (sz <= PAGE_SIZE)
-               kfree(n);
-       else if (hashdist)
-               vfree(n);
-       else
-               free_pages((unsigned long)n, get_order(sz));
+       return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
 }
 
 static void xfrm_hash_transfer(struct hlist_head *list,
@@ -189,16 +89,22 @@ static void xfrm_hash_transfer(struct hlist_head *list,
        hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
                unsigned int h;
 
-               h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
+               h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+                                   x->props.reqid, x->props.family,
+                                   nhashmask);
                hlist_add_head(&x->bydst, ndsttable+h);
 
-               h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+               h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
+                                   x->props.family,
                                    nhashmask);
                hlist_add_head(&x->bysrc, nsrctable+h);
 
-               h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
-                                   x->props.family, nhashmask);
-               hlist_add_head(&x->byspi, nspitable+h);
+               if (x->id.spi) {
+                       h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
+                                           x->id.proto, x->props.family,
+                                           nhashmask);
+                       hlist_add_head(&x->byspi, nspitable+h);
+               }
        }
 }
 
@@ -210,7 +116,7 @@ static unsigned long xfrm_hash_new_size(void)
 
 static DEFINE_MUTEX(hash_resize_mutex);
 
-static void xfrm_hash_resize(void *__unused)
+static void xfrm_hash_resize(struct work_struct *__unused)
 {
        struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
        unsigned long nsize, osize;
@@ -220,18 +126,18 @@ static void xfrm_hash_resize(void *__unused)
        mutex_lock(&hash_resize_mutex);
 
        nsize = xfrm_hash_new_size();
-       ndst = xfrm_state_hash_alloc(nsize);
+       ndst = xfrm_hash_alloc(nsize);
        if (!ndst)
                goto out_unlock;
-       nsrc = xfrm_state_hash_alloc(nsize);
+       nsrc = xfrm_hash_alloc(nsize);
        if (!nsrc) {
-               xfrm_state_hash_free(ndst, nsize);
+               xfrm_hash_free(ndst, nsize);
                goto out_unlock;
        }
-       nspi = xfrm_state_hash_alloc(nsize);
+       nspi = xfrm_hash_alloc(nsize);
        if (!nspi) {
-               xfrm_state_hash_free(ndst, nsize);
-               xfrm_state_hash_free(nsrc, nsize);
+               xfrm_hash_free(ndst, nsize);
+               xfrm_hash_free(nsrc, nsize);
                goto out_unlock;
        }
 
@@ -255,15 +161,15 @@ static void xfrm_hash_resize(void *__unused)
        spin_unlock_bh(&xfrm_state_lock);
 
        osize = (ohashmask + 1) * sizeof(struct hlist_head);
-       xfrm_state_hash_free(odst, osize);
-       xfrm_state_hash_free(osrc, osize);
-       xfrm_state_hash_free(ospi, osize);
+       xfrm_hash_free(odst, osize);
+       xfrm_hash_free(osrc, osize);
+       xfrm_hash_free(ospi, osize);
 
 out_unlock:
        mutex_unlock(&hash_resize_mutex);
 }
 
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
 
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
@@ -275,8 +181,6 @@ static struct work_struct xfrm_state_gc_work;
 static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
-static int xfrm_state_gc_flush_bundles;
-
 int __xfrm_state_delete(struct xfrm_state *x);
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -287,10 +191,8 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
-       if (del_timer(&x->timer))
-               BUG();
-       if (del_timer(&x->rtimer))
-               BUG();
+       del_timer_sync(&x->timer);
+       del_timer_sync(&x->rtimer);
        kfree(x->aalg);
        kfree(x->ealg);
        kfree(x->calg);
@@ -306,17 +208,12 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
        kfree(x);
 }
 
-static void xfrm_state_gc_task(void *data)
+static void xfrm_state_gc_task(struct work_struct *data)
 {
        struct xfrm_state *x;
        struct hlist_node *entry, *tmp;
        struct hlist_head gc_list;
 
-       if (xfrm_state_gc_flush_bundles) {
-               xfrm_state_gc_flush_bundles = 0;
-               xfrm_flush_bundles();
-       }
-
        spin_lock_bh(&xfrm_state_gc_lock);
        gc_list.first = xfrm_state_gc_list.first;
        INIT_HLIST_HEAD(&xfrm_state_gc_list);
@@ -342,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data)
        unsigned long now = (unsigned long)xtime.tv_sec;
        long next = LONG_MAX;
        int warn = 0;
+       int err = 0;
 
        spin_lock(&x->lock);
        if (x->km.state == XFRM_STATE_DEAD)
@@ -387,9 +285,9 @@ static void xfrm_timer_handler(unsigned long data)
        if (warn)
                km_state_expired(x, 0, 0);
 resched:
-       if (next != LONG_MAX &&
-           !mod_timer(&x->timer, jiffies + make_jiffies(next)))
-               xfrm_state_hold(x);
+       if (next != LONG_MAX)
+               mod_timer(&x->timer, jiffies + make_jiffies(next));
+
        goto out;
 
 expired:
@@ -399,12 +297,16 @@ expired:
                next = 2;
                goto resched;
        }
-       if (!__xfrm_state_delete(x) && x->id.spi)
+
+       err = __xfrm_state_delete(x);
+       if (!err && x->id.spi)
                km_state_expired(x, 1, 0);
 
+       xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+                      AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
 out:
        spin_unlock(&x->lock);
-       xfrm_state_put(x);
 }
 
 static void xfrm_replay_timer_handler(unsigned long data);
@@ -459,29 +361,11 @@ int __xfrm_state_delete(struct xfrm_state *x)
                x->km.state = XFRM_STATE_DEAD;
                spin_lock(&xfrm_state_lock);
                hlist_del(&x->bydst);
-               __xfrm_state_put(x);
                hlist_del(&x->bysrc);
-               __xfrm_state_put(x);
-               if (x->id.spi) {
+               if (x->id.spi)
                        hlist_del(&x->byspi);
-                       __xfrm_state_put(x);
-               }
                xfrm_state_num--;
                spin_unlock(&xfrm_state_lock);
-               if (del_timer(&x->timer))
-                       __xfrm_state_put(x);
-               if (del_timer(&x->rtimer))
-                       __xfrm_state_put(x);
-
-               /* The number two in this test is the reference
-                * mentioned in the comment below plus the reference
-                * our caller holds.  A larger value means that
-                * there are DSTs attached to this xfrm_state.
-                */
-               if (atomic_read(&x->refcnt) > 2) {
-                       xfrm_state_gc_flush_bundles = 1;
-                       schedule_work(&xfrm_state_gc_work);
-               }
 
                /* All xfrm_state objects are created by xfrm_state_alloc.
                 * The xfrm_state_alloc call gives a reference, and that
@@ -507,12 +391,13 @@ int xfrm_state_delete(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete);
 
-void xfrm_state_flush(u8 proto)
+void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
 {
        int i;
+       int err = 0;
 
        spin_lock_bh(&xfrm_state_lock);
-       for (i = 0; i < xfrm_state_hmask; i++) {
+       for (i = 0; i <= xfrm_state_hmask; i++) {
                struct hlist_node *entry;
                struct xfrm_state *x;
 restart:
@@ -523,6 +408,11 @@ restart:
                                spin_unlock_bh(&xfrm_state_lock);
 
                                xfrm_state_delete(x);
+                               err = xfrm_state_delete(x);
+                               xfrm_audit_log(audit_info->loginuid,
+                                              audit_info->secid,
+                                              AUDIT_MAC_IPSEC_DELSA,
+                                              err ? 0 : 1, NULL, x);
                                xfrm_state_put(x);
 
                                spin_lock_bh(&xfrm_state_lock);
@@ -549,7 +439,7 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
        return 0;
 }
 
-static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
 {
        unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
        struct xfrm_state *x;
@@ -583,7 +473,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8
 
 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
 {
-       unsigned int h = xfrm_src_hash(saddr, family);
+       unsigned int h = xfrm_src_hash(daddr, saddr, family);
        struct xfrm_state *x;
        struct hlist_node *entry;
 
@@ -628,13 +518,21 @@ __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
                                                  x->id.proto, family);
 }
 
+static void xfrm_hash_grow_check(int have_hash_collision)
+{
+       if (have_hash_collision &&
+           (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
+           xfrm_state_num > xfrm_state_hmask)
+               schedule_work(&xfrm_hash_work);
+}
+
 struct xfrm_state *
 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
                struct flowi *fl, struct xfrm_tmpl *tmpl,
                struct xfrm_policy *pol, int *err,
                unsigned short family)
 {
-       unsigned int h = xfrm_dst_hash(daddr, family);
+       unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
        struct hlist_node *entry;
        struct xfrm_state *x, *x0;
        int acquire_in_progress = 0;
@@ -712,19 +610,17 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                if (km_query(x, tmpl, pol) == 0) {
                        x->km.state = XFRM_STATE_ACQ;
                        hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-                       xfrm_state_hold(x);
-                       h = xfrm_src_hash(saddr, family);
+                       h = xfrm_src_hash(daddr, saddr, family);
                        hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
-                       xfrm_state_hold(x);
                        if (x->id.spi) {
                                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
                                hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-                               xfrm_state_hold(x);
                        }
                        x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-                       xfrm_state_hold(x);
                        x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
                        add_timer(&x->timer);
+                       xfrm_state_num++;
+                       xfrm_hash_grow_check(x->bydst.next != NULL);
                } else {
                        x->km.state = XFRM_STATE_DEAD;
                        xfrm_state_put(x);
@@ -743,55 +639,67 @@ out:
 
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
-       unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+       unsigned int h;
 
-       hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-       xfrm_state_hold(x);
+       x->genid = ++xfrm_state_genid;
 
-       h = xfrm_src_hash(&x->props.saddr, x->props.family);
+       h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+                         x->props.reqid, x->props.family);
+       hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 
+       h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
        hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
-       xfrm_state_hold(x);
 
-       if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
+       if (x->id.spi) {
                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
                                  x->props.family);
 
                hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-               xfrm_state_hold(x);
        }
 
-       if (!mod_timer(&x->timer, jiffies + HZ))
-               xfrm_state_hold(x);
-
-       if (x->replay_maxage &&
-           !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
-               xfrm_state_hold(x);
+       mod_timer(&x->timer, jiffies + HZ);
+       if (x->replay_maxage)
+               mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
        wake_up(&km_waitq);
 
        xfrm_state_num++;
 
-       if (x->bydst.next != NULL &&
-           (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
-           xfrm_state_num > xfrm_state_hmask)
-               schedule_work(&xfrm_hash_work);
+       xfrm_hash_grow_check(x->bydst.next != NULL);
+}
+
+/* xfrm_state_lock is held */
+static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+{
+       unsigned short family = xnew->props.family;
+       u32 reqid = xnew->props.reqid;
+       struct xfrm_state *x;
+       struct hlist_node *entry;
+       unsigned int h;
+
+       h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
+       hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+               if (x->props.family     == family &&
+                   x->props.reqid      == reqid &&
+                   !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
+                   !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+                       x->genid = xfrm_state_genid;
+       }
 }
 
 void xfrm_state_insert(struct xfrm_state *x)
 {
        spin_lock_bh(&xfrm_state_lock);
+       __xfrm_state_bump_genids(x);
        __xfrm_state_insert(x);
        spin_unlock_bh(&xfrm_state_lock);
-
-       xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
 /* xfrm_state_lock is held */
 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
-       unsigned int h = xfrm_dst_hash(daddr, family);
+       unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
        struct hlist_node *entry;
        struct xfrm_state *x;
 
@@ -861,12 +769,14 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
                xfrm_state_hold(x);
                x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
                add_timer(&x->timer);
-               xfrm_state_hold(x);
                hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-               h = xfrm_src_hash(saddr, family);
-               xfrm_state_hold(x);
+               h = xfrm_src_hash(daddr, saddr, family);
                hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
                wake_up(&km_waitq);
+
+               xfrm_state_num++;
+
+               xfrm_hash_grow_check(x->bydst.next != NULL);
        }
 
        return x;
@@ -906,15 +816,13 @@ int xfrm_state_add(struct xfrm_state *x)
                                     x->id.proto,
                                     &x->id.daddr, &x->props.saddr, 0);
 
+       __xfrm_state_bump_genids(x);
        __xfrm_state_insert(x);
        err = 0;
 
 out:
        spin_unlock_bh(&xfrm_state_lock);
 
-       if (!err)
-               xfrm_flush_all_bundles();
-
        if (x1) {
                xfrm_state_delete(x1);
                xfrm_state_put(x1);
@@ -974,8 +882,7 @@ out:
                memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
                x1->km.dying = 0;
 
-               if (!mod_timer(&x1->timer, jiffies + HZ))
-                       xfrm_state_hold(x1);
+               mod_timer(&x1->timer, jiffies + HZ);
                if (x1->curlft.use_time)
                        xfrm_state_check_expire(x1);
 
@@ -1000,8 +907,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
        if (x->curlft.bytes >= x->lft.hard_byte_limit ||
            x->curlft.packets >= x->lft.hard_packet_limit) {
                x->km.state = XFRM_STATE_EXPIRED;
-               if (!mod_timer(&x->timer, jiffies))
-                       xfrm_state_hold(x);
+               mod_timer(&x->timer, jiffies);
                return -EINVAL;
        }
 
@@ -1039,7 +945,7 @@ err:
 EXPORT_SYMBOL(xfrm_state_check);
 
 struct xfrm_state *
-xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
                  unsigned short family)
 {
        struct xfrm_state *x;
@@ -1163,7 +1069,7 @@ u32 xfrm_get_acqseq(void)
 EXPORT_SYMBOL(xfrm_get_acqseq);
 
 void
-xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
 {
        unsigned int h;
        struct xfrm_state *x0;
@@ -1180,10 +1086,10 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
                x->id.spi = minspi;
        } else {
                u32 spi = 0;
-               minspi = ntohl(minspi);
-               maxspi = ntohl(maxspi);
-               for (h=0; h<maxspi-minspi+1; h++) {
-                       spi = minspi + net_random()%(maxspi-minspi+1);
+               u32 low = ntohl(minspi);
+               u32 high = ntohl(maxspi);
+               for (h=0; h<high-low+1; h++) {
+                       spi = low + net_random()%(high-low+1);
                        x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
                        if (x0 == NULL) {
                                x->id.spi = htonl(spi);
@@ -1196,7 +1102,6 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
                spin_lock_bh(&xfrm_state_lock);
                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
                hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-               xfrm_state_hold(x);
                spin_unlock_bh(&xfrm_state_lock);
                wake_up(&km_waitq);
        }
@@ -1207,7 +1112,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
                    void *data)
 {
        int i;
-       struct xfrm_state *x;
+       struct xfrm_state *x, *last = NULL;
        struct hlist_node *entry;
        int count = 0;
        int err = 0;
@@ -1215,24 +1120,22 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
        spin_lock_bh(&xfrm_state_lock);
        for (i = 0; i <= xfrm_state_hmask; i++) {
                hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
-                       if (xfrm_id_proto_match(x->id.proto, proto))
-                               count++;
+                       if (!xfrm_id_proto_match(x->id.proto, proto))
+                               continue;
+                       if (last) {
+                               err = func(last, count, data);
+                               if (err)
+                                       goto out;
+                       }
+                       last = x;
+                       count++;
                }
        }
        if (count == 0) {
                err = -ENOENT;
                goto out;
        }
-
-       for (i = 0; i <= xfrm_state_hmask; i++) {
-               hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
-                       if (!xfrm_id_proto_match(x->id.proto, proto))
-                               continue;
-                       err = func(x, --count, data);
-                       if (err)
-                               goto out;
-               }
-       }
+       err = func(last, 0, data);
 out:
        spin_unlock_bh(&xfrm_state_lock);
        return err;
@@ -1283,10 +1186,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event)
        km_state_notify(x, &c);
 
        if (x->replay_maxage &&
-           !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
-               xfrm_state_hold(x);
+           !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
                x->xflags &= ~XFRM_TIME_DEFER;
-       }
 }
 EXPORT_SYMBOL(xfrm_replay_notify);
 
@@ -1304,14 +1205,12 @@ static void xfrm_replay_timer_handler(unsigned long data)
        }
 
        spin_unlock(&x->lock);
-       xfrm_state_put(x);
 }
 
-int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
 {
        u32 diff;
-
-       seq = ntohl(seq);
+       u32 seq = ntohl(net_seq);
 
        if (unlikely(seq == 0))
                return -EINVAL;
@@ -1333,11 +1232,10 @@ int xfrm_replay_check(struct xfrm_state *x, u32 seq)
 }
 EXPORT_SYMBOL(xfrm_replay_check);
 
-void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 {
        u32 diff;
-
-       seq = ntohl(seq);
+       u32 seq = ntohl(net_seq);
 
        if (seq > x->replay.seq) {
                diff = seq - x->replay.seq;
@@ -1417,7 +1315,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
 }
 EXPORT_SYMBOL(km_query);
 
-int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
 {
        int err = -EINVAL;
        struct xfrm_mgr *km;
@@ -1674,13 +1572,13 @@ void __init xfrm_state_init(void)
 
        sz = sizeof(struct hlist_head) * 8;
 
-       xfrm_state_bydst = xfrm_state_hash_alloc(sz);
-       xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
-       xfrm_state_byspi = xfrm_state_hash_alloc(sz);
+       xfrm_state_bydst = xfrm_hash_alloc(sz);
+       xfrm_state_bysrc = xfrm_hash_alloc(sz);
+       xfrm_state_byspi = xfrm_hash_alloc(sz);
        if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
                panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
        xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
-       INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+       INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 }