]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net...
authorSimon Horman <horms@verge.net.au>
Thu, 13 Jan 2011 01:29:21 +0000 (10:29 +0900)
committerSimon Horman <horms@verge.net.au>
Thu, 13 Jan 2011 01:29:21 +0000 (10:29 +0900)
54 files changed:
include/linux/ip_vs.h
include/linux/netfilter.h
include/linux/netfilter/xt_CT.h
include/linux/netfilter/xt_TCPOPTSTRIP.h
include/linux/netfilter/xt_TPROXY.h
include/linux/netfilter/xt_cluster.h
include/linux/netfilter/xt_quota.h
include/linux/netfilter/xt_time.h
include/linux/netfilter/xt_u32.h
include/linux/skbuff.h
include/net/ip_vs.h
include/net/netfilter/ipv6/nf_conntrack_ipv6.h
include/net/netfilter/ipv6/nf_defrag_ipv6.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_conntrack_extend.h
include/net/netfilter/nf_conntrack_l3proto.h
include/net/netfilter/nf_nat.h
include/net/netfilter/nf_nat_core.h
net/core/skbuff.c
net/ipv4/netfilter/ipt_LOG.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
net/ipv4/netfilter/nf_nat_amanda.c
net/ipv4/netfilter/nf_nat_core.c
net/ipv6/netfilter/ip6t_LOG.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_pe.c
net/netfilter/ipvs/ip_vs_pe_sip.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_proto_udp.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_extend.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_log.c
net/netfilter/nf_queue.c
net/netfilter/nfnetlink_log.c
net/netfilter/xt_CLASSIFY.c
net/netfilter/xt_NFQUEUE.c

index 5f43a3b2e3ad7149c13f2552987c8b5ce1749fcb..4deb3834d62c5621c2e3bc94869fe17198499d19 100644 (file)
 #define IP_VS_CONN_F_TEMPLATE  0x1000          /* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET        0x2000          /* forward only one packet */
 
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+                                 IP_VS_CONN_F_NOOUTPUT | \
+                                 IP_VS_CONN_F_INACTIVE | \
+                                 IP_VS_CONN_F_SEQ_MASK | \
+                                 IP_VS_CONN_F_NO_CPORT | \
+                                 IP_VS_CONN_F_TEMPLATE \
+                                )
+
 /* Flags that are not sent to backup server start from bit 16 */
 #define IP_VS_CONN_F_NFCT      (1 << 16)       /* use netfilter conntrack */
 
index 1893837b39660821351c4e744a166f0983244f7d..0ab7ca787b22058816889c856ed91266712f1878 100644 (file)
@@ -267,7 +267,7 @@ struct nf_afinfo {
        int             route_key_size;
 };
 
-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
 static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
 {
        return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +357,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #endif /*CONFIG_NETFILTER*/
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
 extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 #endif
index 1b564106891dbc9e4aa181f7bb95bc471202dadc..fbf4c56585547a20839aaaf334e2eb25726f7650 100644 (file)
@@ -4,11 +4,11 @@
 #define XT_CT_NOTRACK  0x1
 
 struct xt_ct_target_info {
-       u_int16_t       flags;
-       u_int16_t       zone;
-       u_int32_t       ct_events;
-       u_int32_t       exp_events;
-       char            helper[16];
+       __u16 flags;
+       __u16 zone;
+       __u32 ct_events;
+       __u32 exp_events;
+       char helper[16];
 
        /* Used internally by the kernel */
        struct nf_conn  *ct __attribute__((aligned(8)));
index 2db543214ff59b57b644c5abc1163086ef8ca732..342ef14b1761017c52072f83c4ad1ef134f3b4a0 100644 (file)
@@ -7,7 +7,7 @@
        (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
 
 struct xt_tcpoptstrip_target_info {
-       u_int32_t strip_bmap[8];
+       __u32 strip_bmap[8];
 };
 
 #endif /* _XT_TCPOPTSTRIP_H */
index 3f3d69361289ca4cecda2182f71d62e0b7137053..8097e0b4c15e78f42e75df6dcae2ed1b9e9eed2e 100644 (file)
@@ -5,15 +5,15 @@
  * redirection. We can get rid of that whenever we get support for
  * mutliple targets in the same rule. */
 struct xt_tproxy_target_info {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        __be32 laddr;
        __be16 lport;
 };
 
 struct xt_tproxy_target_info_v1 {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        union nf_inet_addr laddr;
        __be16 lport;
 };
index 886682656f098db6df2dd5cd0327cd5a76c5b6c1..66cfa3c782ac5963b4d857c0dc00bfe420b2370e 100644 (file)
@@ -6,10 +6,10 @@ enum xt_cluster_flags {
 };
 
 struct xt_cluster_match_info {
-       u_int32_t               total_nodes;
-       u_int32_t               node_mask;
-       u_int32_t               hash_seed;
-       u_int32_t               flags;
+       __u32 total_nodes;
+       __u32 node_mask;
+       __u32 hash_seed;
+       __u32 flags;
 };
 
 #define XT_CLUSTER_NODES_MAX   32
index b0d28c659ab75c7a87aa9e4cb956c516bf5aeaf1..8bda65f0bc92b70a1f8d069b3452f8a8f6bf85cd 100644 (file)
@@ -9,9 +9,9 @@ enum xt_quota_flags {
 struct xt_quota_priv;
 
 struct xt_quota_info {
-       u_int32_t               flags;
-       u_int32_t               pad;
-       aligned_u64             quota;
+       __u32 flags;
+       __u32 pad;
+       aligned_u64 quota;
 
        /* Used internally by the kernel */
        struct xt_quota_priv    *master;
index 14b6df412c9fa1674267be11f0c3ca698b5411aa..b8bd4568efdb944bfb00c817326bce333a60267a 100644 (file)
@@ -2,13 +2,13 @@
 #define _XT_TIME_H 1
 
 struct xt_time_info {
-       u_int32_t date_start;
-       u_int32_t date_stop;
-       u_int32_t daytime_start;
-       u_int32_t daytime_stop;
-       u_int32_t monthdays_match;
-       u_int8_t weekdays_match;
-       u_int8_t flags;
+       __u32 date_start;
+       __u32 date_stop;
+       __u32 daytime_start;
+       __u32 daytime_stop;
+       __u32 monthdays_match;
+       __u8 weekdays_match;
+       __u8 flags;
 };
 
 enum {
index 9947f56cdbddc6021b15c5e0a7641a39e1ef43f3..e8c3d8722baef7895f426e5cd7f7e9be60a58059 100644 (file)
@@ -9,13 +9,13 @@ enum xt_u32_ops {
 };
 
 struct xt_u32_location_element {
-       u_int32_t number;
-       u_int8_t nextop;
+       __u32 number;
+       __u8 nextop;
 };
 
 struct xt_u32_value_element {
-       u_int32_t min;
-       u_int32_t max;
+       __u32 min;
+       __u32 max;
 };
 
 /*
@@ -27,14 +27,14 @@ struct xt_u32_value_element {
 struct xt_u32_test {
        struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
        struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
-       u_int8_t nnums;
-       u_int8_t nvalues;
+       __u8 nnums;
+       __u8 nvalues;
 };
 
 struct xt_u32 {
        struct xt_u32_test tests[XT_U32_MAXSIZE+1];
-       u_int8_t ntests;
-       u_int8_t invert;
+       __u8 ntests;
+       __u8 invert;
 };
 
 #endif /* _XT_U32_H */
index 20ec0a64cb9ff0f8708a66ddaeb2ffd1d3ed72c9..bf221d65d9ad5d0c2878795021b2733012c4ef2f 100644 (file)
@@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
+    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
+#endif
+
 /** 
  *     struct sk_buff - socket buffer
  *     @next: Next buffer in list
@@ -362,6 +367,8 @@ struct sk_buff {
        void                    (*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        struct nf_conntrack     *nfct;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
        struct sk_buff          *nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -2057,6 +2064,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
        if (nfct)
                atomic_inc(&nfct->use);
 }
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
 {
        if (skb)
@@ -2085,6 +2094,8 @@ static inline void nf_reset(struct sk_buff *skb)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb->nfct);
        skb->nfct = NULL;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
        nf_conntrack_put_reasm(skb->nfct_reasm);
        skb->nfct_reasm = NULL;
 #endif
@@ -2101,6 +2112,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
        dst->nfct = src->nfct;
        nf_conntrack_get(src->nfct);
        dst->nfctinfo = src->nfctinfo;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
        dst->nfct_reasm = src->nfct_reasm;
        nf_conntrack_get_reasm(src->nfct_reasm);
 #endif
@@ -2114,6 +2127,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(dst->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
        nf_conntrack_put_reasm(dst->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
index b7bbd6c28cfa17dde6fa3a972d33635c5a498312..d858264217ba7bffa5ea873028b660de98477710 100644 (file)
@@ -382,6 +382,7 @@ struct ip_vs_conn {
        union nf_inet_addr       vaddr;          /* virtual address */
        union nf_inet_addr       daddr;          /* destination address */
        volatile __u32           flags;          /* status flags */
+       __u32                    fwmark;         /* Fire wall mark from skb */
        __be16                   cport;
        __be16                   vport;
        __be16                   dport;
@@ -422,6 +423,7 @@ struct ip_vs_conn {
        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 
+       const struct ip_vs_pe   *pe;
        char                    *pe_data;
        __u8                    pe_data_len;
 };
@@ -719,7 +721,7 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
                                  const union nf_inet_addr *daddr,
                                  __be16 dport, unsigned flags,
-                                 struct ip_vs_dest *dest);
+                                 struct ip_vs_dest *dest, __u32 fwmark);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
 extern const char * ip_vs_state_name(__u16 proto, int state);
@@ -814,8 +816,20 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
 void ip_vs_unbind_pe(struct ip_vs_service *svc);
 int register_ip_vs_pe(struct ip_vs_pe *pe);
 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
-extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
-extern void ip_vs_pe_put(struct ip_vs_pe *pe);
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
+
+static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               __module_get(pe->module);
+}
+
+static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               module_put(pe->module);
+}
 
 /*
  *     IPVS protocol functions (from ip_vs_proto.c)
@@ -869,7 +883,9 @@ extern int sysctl_ip_vs_conntrack;
 extern int sysctl_ip_vs_snat_reroute;
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
 
+extern void ip_vs_sync_switch_mode(int mode);
 extern struct ip_vs_service *
 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport);
@@ -889,7 +905,8 @@ extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
 ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
-               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol,
+               __u32 fwmark);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 
index 1ee717eb5b099f6fa1fc782506c05e0dd59addb8..a4c99368579509387e6ae75eff33383db5e3f8a7 100644 (file)
@@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-                              struct net_device *in,
-                              struct net_device *out,
-                              int (*okfn)(struct sk_buff *));
-
-struct inet_frags_ctl;
-
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
 
index 94dd54d76b48d215729059d53b9e6a983070138a..fd79c9a1779d19d6a5dd54ef7380b990763a00ea 100644 (file)
@@ -3,4 +3,14 @@
 
 extern void nf_defrag_ipv6_enable(void);
 
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+                              struct net_device *in,
+                              struct net_device *out,
+                              int (*okfn)(struct sk_buff *));
+
+struct inet_frags_ctl;
+
 #endif /* _NF_DEFRAG_IPV6_H */
index d85cff10e1693535f326b7467330b985734d2309..2bc344c98215e436ca62adb1973e62a009a8415c 100644 (file)
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
 /* per conntrack: application helper private data */
 union nf_conntrack_help {
        /* insert conntrack helper private data (master) here */
+#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
        struct nf_ct_ftp_master ct_ftp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
+    defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
        struct nf_ct_pptp_master ct_pptp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_H323) || \
+    defined(CONFIG_NF_CONNTRACK_H323_MODULE)
        struct nf_ct_h323_master ct_h323_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SANE) || \
+    defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
        struct nf_ct_sane_master ct_sane_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
        struct nf_ct_sip_master ct_sip_info;
+#endif
 };
 
 #include <linux/types.h>
@@ -116,14 +129,14 @@ struct nf_conn {
        u_int32_t secmark;
 #endif
 
-       /* Storage reserved for other modules: */
-       union nf_conntrack_proto proto;
-
        /* Extensions */
        struct nf_ct_ext *ext;
 #ifdef CONFIG_NET_NS
        struct net *ct_net;
 #endif
+
+       /* Storage reserved for other modules, must be the last member */
+       union nf_conntrack_proto proto;
 };
 
 static inline struct nf_conn *
index 96ba5f7dcab60ad26618fa028e5968ecb83310e8..8fdb04b8cce0cef01b5e61529348116b9f42b108 100644 (file)
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_find(const struct nf_conn *ct)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+#else
+       return NULL;
+#endif
 }
 
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        struct net *net = nf_ct_net(ct);
        struct nf_conntrack_ecache *e;
 
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
                e->expmask = expmask;
        }
        return e;
+#else
+       return NULL;
+#endif
 };
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
        int (*fcn)(unsigned int events, struct nf_ct_event *item);
 };
 
-extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
 extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
 extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
        int (*fcn)(unsigned int events, struct nf_exp_event *item);
 };
 
-extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
 extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
 extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
index 0772d296dfdb12b26a4bd8d97fecdd914c088536..1a9f96db37982cffe768dbf9c5390829bff3625e 100644 (file)
@@ -7,10 +7,16 @@
 
 enum nf_ct_ext_id {
        NF_CT_EXT_HELPER,
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        NF_CT_EXT_NAT,
+#endif
        NF_CT_EXT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        NF_CT_EXT_ECACHE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
        NF_CT_EXT_ZONE,
+#endif
        NF_CT_EXT_NUM,
 };
 
index a7547611e8f17389a2d95df481e4232315531df3..e8010f445ae1318f9a6f1e55d156313aa2af70a3 100644 (file)
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
        struct module *me;
 };
 
-extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
 
 /* Protocol registration. */
 extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
index f5f09f032a90fa4fab8c849e9d22302542328398..aff80b190c1231a8eb5cae1b61b34007c833d870 100644 (file)
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help {
        /* insert nat helper private data here */
+#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
        struct nf_nat_pptp nat_pptp_info;
+#endif
 };
 
 struct nf_conn;
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+#else
+       return NULL;
+#endif
 }
 
 #else  /* !__KERNEL__: iptables wants this to compile. */
index 33602ab66190c20e8de7cb70d284bae9f9d92e59..5aec85c29979ff636a30ac095c6e11bccccdd1c4 100644 (file)
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
                                     enum nf_nat_manip_type manip)
 {
        if (manip == IP_NAT_MANIP_SRC)
-               return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_SRC_NAT_DONE_BIT;
        else
-               return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_DST_NAT_DONE_BIT;
 }
 
 struct nlattr;
index 19d6c21220fd47bb7141d8376a55c1785627a5e8..d31bb36ae0dc21cfdab61b90dd2f2fbfd665ce81 100644 (file)
@@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb)
        }
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
        nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
index 72ffc8fda2e9faca3ab8c4cf3682df0de4a44faa..d76d6c9ed9468263547a918936a76a1a9526b368 100644 (file)
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
        }
 #endif
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, 0);
index 63f60fc5d26a49ea844c106e8570260b29445a88..5585980fce2e351b35d2e6f0f741531f6a87d199 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
 
 struct ct_iter_state {
        struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
        for (st->bucket = 0;
             st->bucket < net->ct.htable_size;
             st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                if (!is_a_nulls(n))
                        return n;
        }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
        while (is_a_nulls(head)) {
                if (likely(get_nulls_value(head) == st->bucket)) {
                        if (++st->bucket >= net->ct.htable_size)
                                return NULL;
                }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
        }
        return head;
 }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
        struct hlist_node *n;
 
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                if (n)
                        return n;
        }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
        }
        return head;
 }
index 0f23b3f06df05e7643e1cd337325955dc6942794..703f366fd2358a3dd32c813700511ffb5129f351 100644 (file)
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
 
        /* Try to get same port: if not, try to change it. */
        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-               int ret;
+               int res;
 
                exp->tuple.dst.u.tcp.port = htons(port);
-               ret = nf_ct_expect_related(exp);
-               if (ret == 0)
+               res = nf_ct_expect_related(exp);
+               if (res == 0)
                        break;
-               else if (ret != -EBUSY) {
+               else if (res != -EBUSY) {
                        port = 0;
                        break;
                }
index c04787ce1a71203e1346830450b0a130e358defc..eb55835a02c380e7d0b961cfc047f5fea5b4ad52 100644 (file)
@@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
        /* It's done. */
        if (maniptype == IP_NAT_MANIP_DST)
-               set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_DST_NAT_DONE_BIT;
        else
-               set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_SRC_NAT_DONE_BIT;
 
        return NF_ACCEPT;
 }
@@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
        int ret = 0;
 
        spin_lock_bh(&nf_nat_lock);
-       if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+       if (rcu_dereference_protected(
+                       nf_nat_protos[proto->protonum],
+                       lockdep_is_held(&nf_nat_lock)
+                       ) != &nf_nat_unknown_protocol) {
                ret = -EBUSY;
                goto out;
        }
index 09c88891a753e725d8594edc68941b293d171100..05027b753721764b14035d04241be33d9bc47329 100644 (file)
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
               in ? in->name : "",
               out ? out->name : "");
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
index 79d43aa8fa8da80b405689bf7c751d7a9cc89ad4..66e003e1fcd595c009c78063df4fecd10aa46496 100644 (file)
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_frag6_timeout",
                .data           = &nf_init_frags.timeout,
index 99abfb53bab91def61a8d7cdeaf2ee64ed8aa8f6..97c5b21b9674f3fb005da40235e78303f6323a9a 100644 (file)
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 {
        u16 zone = NF_CT_DEFAULT_ZONE;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        if (skb->nfct)
                zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
        if (skb->nf_bridge &&
@@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 {
        struct sk_buff *reasm;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        /* Previously seen (loopback)?  */
        if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
                return NF_ACCEPT;
+#endif
 
        reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
        /* queued */
index 32fcbe290c047802942b5d9838489a2193b622b2..e69d537362c72bb7d1436f85ce57cdf589719fb1 100644 (file)
@@ -214,7 +214,7 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +231,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 EXPORT_SYMBOL(nf_ct_destroy);
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct)
index e9adecdc8ca4779468c494c1a2418049ca2384eb..66e4662925d5cce7e560f119ff8811310b6e7d36 100644 (file)
 /*
  * Connection hash size. Default is what was selected at compile time.
 */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 
 /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
 
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
 
 /*  SLAB cache for IPVS connections */
 static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
@@ -71,7 +71,7 @@ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
 
 /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
 
 /*
  *  Fine locking granularity for big connection hash table
@@ -176,8 +176,8 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
        ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
                              NULL, 0, &p);
 
-       if (cp->dest && cp->dest->svc->pe) {
-               p.pe = cp->dest->svc->pe;
+       if (cp->pe) {
+               p.pe = cp->pe;
                p.pe_data = cp->pe_data;
                p.pe_data_len = cp->pe_data_len;
        }
@@ -354,7 +354,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (p->pe_data && p->pe->ct_match) {
-                       if (p->pe->ct_match(p, cp))
+                       if (p->pe == cp->pe && p->pe->ct_match(p, cp))
                                goto out;
                        continue;
                }
@@ -613,7 +613,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
        if ((cp) && (!cp->dest)) {
                dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
                                       &cp->vaddr, cp->vport,
-                                      cp->protocol);
+                                      cp->protocol, cp->fwmark);
                ip_vs_bind_dest(cp, dest);
                return dest;
        } else
@@ -765,6 +765,7 @@ static void ip_vs_conn_expire(unsigned long data)
                if (cp->flags & IP_VS_CONN_F_NFCT)
                        ip_vs_conn_drop_conntrack(cp);
 
+               ip_vs_pe_put(cp->pe);
                kfree(cp->pe_data);
                if (unlikely(cp->app != NULL))
                        ip_vs_unbind_app(cp);
@@ -802,7 +803,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 struct ip_vs_conn *
 ip_vs_conn_new(const struct ip_vs_conn_param *p,
               const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-              struct ip_vs_dest *dest)
+              struct ip_vs_dest *dest, __u32 fwmark)
 {
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
@@ -826,7 +827,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
                        &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags          = flags;
-       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+       cp->fwmark         = fwmark;
+       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+               ip_vs_pe_get(p->pe);
+               cp->pe = p->pe;
                cp->pe_data = p->pe_data;
                cp->pe_data_len = p->pe_data_len;
        }
@@ -958,15 +962,13 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
                size_t len = 0;
 
-               if (cp->dest && cp->pe_data &&
-                   cp->dest->svc->pe->show_pe_data) {
+               if (cp->pe_data) {
                        pe_data[0] = ' ';
-                       len = strlen(cp->dest->svc->pe->name);
-                       memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+                       len = strlen(cp->pe->name);
+                       memcpy(pe_data + 1, cp->pe->name, len);
                        pe_data[len + 1] = ' ';
                        len += 2;
-                       len += cp->dest->svc->pe->show_pe_data(cp,
-                                                              pe_data + len);
+                       len += cp->pe->show_pe_data(cp, pe_data + len);
                }
                pe_data[len] = '\0';
 
index b4e51e9c5a04ad4e1314a338529cc0284407feb3..5287771d0647ba4a40887ad41fde97ab17fcde90 100644 (file)
@@ -177,7 +177,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
        return pp->state_transition(cp, direction, skb, pp);
 }
 
-static inline void
+static inline int
 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
                              struct sk_buff *skb, int protocol,
                              const union nf_inet_addr *caddr, __be16 cport,
@@ -187,7 +187,9 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
        ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
        p->pe = svc->pe;
        if (p->pe && p->pe->fill_param)
-               p->pe->fill_param(p, skb);
+               return p->pe->fill_param(p, skb);
+
+       return 0;
 }
 
 /*
@@ -200,7 +202,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
                    struct sk_buff *skb,
-                   __be16 ports[2])
+                   __be16 src_port, __be16 dst_port, int *ignored)
 {
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_iphdr iph;
@@ -224,8 +226,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 
        IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
                      "mnet %s\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
                      IP_VS_DBG_ADDR(svc->af, &snet));
 
        /*
@@ -247,14 +249,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
                __be16 vport = 0;
 
-               if (ports[1] == svc->port) {
+               if (dst_port == svc->port) {
                        /* non-FTP template:
                         * <protocol, caddr, 0, vaddr, vport, daddr, dport>
                         * FTP template:
                         * <protocol, caddr, 0, vaddr, 0, daddr, 0>
                         */
                        if (svc->port != FTPPORT)
-                               vport = ports[1];
+                               vport = dst_port;
                } else {
                        /* Note: persistent fwmark-based services and
                         * persistent port zero service are handled here.
@@ -268,24 +270,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                vaddr = &fwmark;
                        }
                }
-               ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-                                             vaddr, vport, &param);
+               /* return *ignored = -1 so NF_DROP can be used */
+               if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+                                                 vaddr, vport, &param) < 0) {
+                       *ignored = -1;
+                       return NULL;
+               }
        }
 
        /* Check if a template already exists */
        ct = ip_vs_ct_in_get(&param);
        if (!ct || !ip_vs_check_template(ct)) {
-               /* No template found or the dest of the connection
+               /*
+                * No template found or the dest of the connection
                 * template is not available.
+                * return *ignored=0 i.e. ICMP and NF_DROP
                 */
                dest = svc->scheduler->schedule(svc, skb);
                if (!dest) {
                        IP_VS_DBG(1, "p-schedule: no dest found.\n");
                        kfree(param.pe_data);
+                       *ignored = 0;
                        return NULL;
                }
 
-               if (ports[1] == svc->port && svc->port != FTPPORT)
+               if (dst_port == svc->port && svc->port != FTPPORT)
                        dport = dest->port;
 
                /* Create a template
@@ -293,9 +302,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * and thus param.pe_data will be destroyed
                 * when the template expires */
                ct = ip_vs_conn_new(&param, &dest->addr, dport,
-                                   IP_VS_CONN_F_TEMPLATE, dest);
+                                   IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
                if (ct == NULL) {
                        kfree(param.pe_data);
+                       *ignored = -1;
                        return NULL;
                }
 
@@ -306,7 +316,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                kfree(param.pe_data);
        }
 
-       dport = ports[1];
+       dport = dst_port;
        if (dport == svc->port && dest->port)
                dport = dest->port;
 
@@ -317,11 +327,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        /*
         *    Create a new connection according to the template
         */
-       ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-                             &iph.daddr, ports[1], &param);
-       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+       ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
+                             &iph.daddr, dst_port, &param);
+
+       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
        if (cp == NULL) {
                ip_vs_conn_put(ct);
+               *ignored = -1;
                return NULL;
        }
 
@@ -341,6 +353,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
  */
 struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
@@ -371,11 +398,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        }
 
        /*
-        * Do not schedule replies from local real server. It is risky
-        * for fwmark services but mostly for persistent services.
+        *    Do not schedule replies from local real server.
         */
        if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-           (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
            (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
                IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
                              "Not scheduling reply for existing connection");
@@ -386,10 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        /*
         *    Persistent service
         */
-       if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-               *ignored = 0;
-               return ip_vs_sched_persist(svc, skb, pptr);
-       }
+       if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+               return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+       *ignored = 0;
 
        /*
         *    Non-persistent service
@@ -402,8 +427,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                return NULL;
        }
 
-       *ignored = 0;
-
        dest = svc->scheduler->schedule(svc, skb);
        if (dest == NULL) {
                IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -423,9 +446,11 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                                      pptr[0], &iph.daddr, pptr[1], &p);
                cp = ip_vs_conn_new(&p, &dest->addr,
                                    dest->port ? dest->port : pptr[1],
-                                   flags, dest);
-               if (!cp)
+                                   flags, dest, skb->mark);
+               if (!cp) {
+                       *ignored = -1;
                        return NULL;
+               }
        }
 
        IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -489,7 +514,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                                              &iph.daddr, pptr[1], &p);
                        cp = ip_vs_conn_new(&p, &daddr, 0,
                                            IP_VS_CONN_F_BYPASS | flags,
-                                           NULL);
+                                           NULL, skb->mark);
                        if (!cp)
                                return NF_DROP;
                }
@@ -1535,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         *
         * Sync connection if it is about to close to
         * encorage the standby servers to update the connections timeout
+        *
+        * For ONE_PKT let ip_vs_sync_conn() do the filter work.
         */
-       pkts = atomic_add_return(1, &cp->in_pkts);
-       if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               pkts = sysctl_ip_vs_sync_threshold[0];
+       else
+               pkts = atomic_add_return(1, &cp->in_pkts);
+
+       if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
            cp->protocol == IPPROTO_SCTP) {
                if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
                        (pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1552,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        /* Keep this block last: TCP and others with pp->num_states <= 1 */
-       else if (af == AF_INET &&
-           (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
            (((cp->protocol != IPPROTO_TCP ||
               cp->state == IP_VS_TCP_S_ESTABLISHED) &&
              (pkts % sysctl_ip_vs_sync_threshold[1]
index 22f7ad5101abb32d24af2d254dcd3528501d6553..ca49e928f30220f242c81f3263a6148b13a6e669 100644 (file)
@@ -92,7 +92,7 @@ int sysctl_ip_vs_nat_icmp_send = 0;
 int sysctl_ip_vs_conntrack;
 #endif
 int sysctl_ip_vs_snat_reroute = 1;
-
+int sysctl_ip_vs_sync_ver = 1;         /* Default version of sync proto */
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -655,12 +655,12 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
                                   __be16 dport,
                                   const union nf_inet_addr *vaddr,
-                                  __be16 vport, __u16 protocol)
+                                  __be16 vport, __u16 protocol, __u32 fwmark)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_service *svc;
 
-       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
        if (!svc)
                return NULL;
        dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -1137,7 +1137,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        }
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1248,7 +1248,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
        old_sched = sched;
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1534,6 +1534,25 @@ proc_do_sync_threshold(ctl_table *table, int write,
        return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               if ((*valp < 0) || (*valp > 1)) {
+                       /* Restore the correct value */
+                       *valp = val;
+               } else {
+                       ip_vs_sync_switch_mode(val);
+               }
+       }
+       return rc;
+}
 
 /*
  *     IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
@@ -1600,6 +1619,13 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .procname       = "sync_version",
+               .data           = &sysctl_ip_vs_sync_ver,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_sync_mode,
+       },
 #if 0
        {
                .procname       = "timeout_established",
index 75455000ad1c1cde82b2134970ab3a67a5a97b82..84aef65b37d1be59fa1d17698a0ae5cf73414e59 100644 (file)
@@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                        n_cp = ip_vs_conn_new(&p, &from, port,
                                              IP_VS_CONN_F_NO_CPORT |
                                              IP_VS_CONN_F_NFCT,
-                                             cp->dest);
+                                             cp->dest, skb->mark);
                        if (!n_cp)
                                return 0;
 
@@ -365,7 +365,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
                if (!n_cp) {
                        n_cp = ip_vs_conn_new(&p, &cp->daddr,
                                              htons(ntohs(cp->dport)-1),
-                                             IP_VS_CONN_F_NFCT, cp->dest);
+                                             IP_VS_CONN_F_NFCT, cp->dest,
+                                             skb->mark);
                        if (!n_cp)
                                return 0;
 
index 3414af70ee127c0f30f7b49cf0c15d91683bde5b..5cf859ccb31bbe096c9588643a9ed0f0d5f13342 100644 (file)
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
 }
 
 /* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 {
        struct ip_vs_pe *pe;
 
-       IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+       IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
                  pe_name);
 
        spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
 }
 
 /* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
 {
        struct ip_vs_pe *pe;
 
        /* Search for the pe by name */
-       pe = ip_vs_pe_getbyname(name);
+       pe = __ip_vs_pe_getbyname(name);
 
        /* If pe not found, load the module and search again */
        if (!pe) {
                request_module("ip_vs_pe_%s", name);
-               pe = ip_vs_pe_getbyname(name);
+               pe = __ip_vs_pe_getbyname(name);
        }
 
        return pe;
 }
 
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
-       if (pe && pe->module)
-               module_put(pe->module);
-}
-
 /* Register a pe in the pe list */
 int register_ip_vs_pe(struct ip_vs_pe *pe)
 {
index b8b4e9620f3e936251da71d452f1d1f7aed68d04..0d83bc01fed4c14d7323debd536c5ca738cd5954 100644 (file)
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
        struct ip_vs_iphdr iph;
        unsigned int dataoff, datalen, matchoff, matchlen;
        const char *dptr;
+       int retc;
 
        ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
 
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
        if (dataoff >= skb->len)
                return -EINVAL;
 
+       if ((retc=skb_linearize(skb)) < 0)
+               return retc;
        dptr = skb->data + dataoff;
        datalen = skb->len - dataoff;
 
index 1ea96bcd342b8fc81eb437a6826fc6652c2be4c2..a315159983ad773053567e3fed5a17ec9e566309 100644 (file)
@@ -47,13 +47,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * incoming connection, and create a connection entry.
                 */
                *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pp);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
-
+       /* NF_ACCEPT */
        return 1;
 }
 
index f6c5200e214663fe915b2136532c54d03861e5eb..1cdab12abfef333e4f3d7765b96873d3b39082d0 100644 (file)
@@ -64,12 +64,18 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * incoming connection, and create a connection entry.
                 */
                *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pp);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
+       /* NF_ACCEPT */
        return 1;
 }
 
index 9d106a06bb0a46376252b32f2d30882d921b8b16..cd398de010cc8715fa587955b1ec0c557dce6c55 100644 (file)
@@ -63,12 +63,18 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * incoming connection, and create a connection entry.
                 */
                *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pp);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
+       /* NF_ACCEPT */
        return 1;
 }
 
index ab85aedea17eea6100eb1aefe48b028d371f29d2..c1c167ab73eed9725414c26229907bec12f02cdf 100644 (file)
@@ -5,6 +5,18 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * ip_vs_sync:  sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
  *     Alexandre Cassen        :       Added SyncID support for incoming sync
  *                                     messages filtering.
  *     Justin Ossevoort        :       Fix endian problem on sync message size.
+ *     Hans Schillstrom        :       Added Version 1: i.e. IPv6,
+ *                                     Persistence support, fwmark and time-out.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
 #include <linux/wait.h>
 #include <linux/kernel.h>
 
+#include <asm/unaligned.h>             /* Used for ntoh_seq and hton_seq */
+
 #include <net/ip.h>
 #include <net/sock.h>
 
 #define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
 #define IP_VS_SYNC_PORT  8848          /* multicast port */
 
+#define SYNC_PROTO_VER  1              /* Protocol version in header */
 
 /*
  *     IPVS sync connection entry
+ *     Version 0, i.e. original version.
  */
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
        __u8                    reserved;
 
        /* Protocol, addresses and port numbers */
@@ -71,41 +89,158 @@ struct ip_vs_sync_conn_options {
        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 };
 
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       __be32                  caddr;          /* client address */
+       __be32                  vaddr;          /* virtual address */
+       __be32                  daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       struct in6_addr         caddr;          /* client address */
+       struct in6_addr         vaddr;          /* virtual address */
+       struct in6_addr         daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+       struct ip_vs_sync_v4    v4;
+       struct ip_vs_sync_v6    v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6            0
+#define STYPE_F_INET6          (1 << STYPE_INET6)
+
+#define SVER_SHIFT             12              /* Shift to get version */
+#define SVER_MASK              0x0fff          /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA      1
+#define IPVS_OPT_PE_DATA       2
+#define IPVS_OPT_PE_NAME       3
+#define IPVS_OPT_PARAM         7
+
+#define IPVS_OPT_F_SEQ_DATA    (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA     (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME     (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM       (1 << (IPVS_OPT_PARAM-1))
+
 struct ip_vs_sync_thread_data {
        struct socket *sock;
        char *buf;
 };
 
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
 #define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
 
 
 /*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
 
        0                   1                   2                   3
        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (1)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                            .                                  |
-      |                            .                                  |
+      ~                            .                                  ~
       |                            .                                  |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (n)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
 */
 
 #define SYNC_MESG_HEADER_LEN   4
 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
 
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
        __u8                    nr_conns;
        __u8                    syncid;
        __u16                   size;
@@ -113,6 +248,17 @@ struct ip_vs_sync_mesg {
        /* ip_vs_sync_conn entries start here */
 };
 
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+       __u8                    reserved;       /* must be zero */
+       __u8                    syncid;
+       __u16                   size;
+       __u8                    nr_conns;
+       __s8                    version;        /* SYNC_PROTO_VER  */
+       __u16                   spare;
+       /* ip_vs_sync_conn entries start here */
+};
+
 /* the maximum length of sync (sending/receiving) message */
 static int sync_send_mesg_maxlen;
 static int sync_recv_mesg_maxlen;
@@ -156,6 +302,27 @@ static struct sockaddr_in mcast_addr = {
        .sin_addr.s_addr        = cpu_to_be32(IP_VS_SYNC_GROUP),
 };
 
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+       ho->init_seq       = get_unaligned_be32(&no->init_seq);
+       ho->delta          = get_unaligned_be32(&no->delta);
+       ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+       put_unaligned_be32(ho->init_seq, &no->init_seq);
+       put_unaligned_be32(ho->delta, &no->delta);
+       put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
 
 static inline struct ip_vs_sync_buff *sb_dequeue(void)
 {
@@ -175,6 +342,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
        return sb;
 }
 
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
 static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
 {
        struct ip_vs_sync_buff *sb;
@@ -186,11 +356,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
                kfree(sb);
                return NULL;
        }
-       sb->mesg->nr_conns = 0;
+       sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+       sb->mesg->version = SYNC_PROTO_VER;
        sb->mesg->syncid = ip_vs_master_syncid;
-       sb->mesg->size = 4;
-       sb->head = (unsigned char *)sb->mesg + 4;
+       sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
+       sb->mesg->nr_conns = 0;
+       sb->mesg->spare = 0;
+       sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
        sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+
        sb->firstuse = jiffies;
        return sb;
 }
@@ -231,20 +405,77 @@ get_curr_sync_buff(unsigned long time)
        return sb;
 }
 
+/*
+ * Switch mode from sending version 0 or 1
+ *  - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(int mode) {
+
+       if (!ip_vs_sync_state & IP_VS_STATE_MASTER)
+               return;
+       if (mode == sysctl_ip_vs_sync_ver || !curr_sb)
+               return;
+
+       spin_lock_bh(&curr_sb_lock);
+       /* Buffer empty ? then let buf_create do the job  */
+       if ( curr_sb->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
+               kfree(curr_sb);
+               curr_sb = NULL;
+       } else {
+               spin_lock_bh(&ip_vs_sync_lock);
+               if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+                       list_add_tail(&curr_sb->list, &ip_vs_sync_queue);
+               else
+                       ip_vs_sync_buff_release(curr_sb);
+               spin_unlock_bh(&ip_vs_sync_lock);
+       }
+       spin_unlock_bh(&curr_sb_lock);
+}
 
 /*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
+{
+       struct ip_vs_sync_buff *sb;
+       struct ip_vs_sync_mesg_v0 *mesg;
+
+       if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+               return NULL;
+
+       if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+               kfree(sb);
+               return NULL;
+       }
+       mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+       mesg->nr_conns = 0;
+       mesg->syncid = ip_vs_master_syncid;
+       mesg->size = 4;
+       sb->head = (unsigned char *)mesg + 4;
+       sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen;
+       sb->firstuse = jiffies;
+       return sb;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
  *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
 {
-       struct ip_vs_sync_mesg *m;
-       struct ip_vs_sync_conn *s;
+       struct ip_vs_sync_mesg_v0 *m;
+       struct ip_vs_sync_conn_v0 *s;
        int len;
 
+       if (unlikely(cp->af != AF_INET))
+               return;
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return;
+
        spin_lock(&curr_sb_lock);
        if (!curr_sb) {
-               if (!(curr_sb=ip_vs_sync_buff_create())) {
+               if (!(curr_sb=ip_vs_sync_buff_create_v0())) {
                        spin_unlock(&curr_sb_lock);
                        pr_err("ip_vs_sync_buff_create failed.\n");
                        return;
@@ -253,10 +484,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
        len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
                SIMPLE_CONN_SIZE;
-       m = curr_sb->mesg;
-       s = (struct ip_vs_sync_conn *)curr_sb->head;
+       m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg;
+       s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
 
        /* copy members */
+       s->reserved = 0;
        s->protocol = cp->protocol;
        s->cport = cp->cport;
        s->vport = cp->vport;
@@ -277,7 +509,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
        curr_sb->head += len;
 
        /* check if there is a space for next one */
-       if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+       if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) {
                sb_queue_tail(curr_sb);
                curr_sb = NULL;
        }
@@ -288,69 +520,343 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
                ip_vs_sync_conn(cp->control);
 }
 
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+       struct ip_vs_sync_mesg *m;
+       union ip_vs_sync_conn *s;
+       __u8 *p;
+       unsigned int len, pe_name_len, pad;
+
+       /* Handle old version of the protocol */
+       if (sysctl_ip_vs_sync_ver == 0) {
+               ip_vs_sync_conn_v0(cp);
+               return;
+       }
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               goto control;
+sloop:
+       /* Sanity checks */
+       pe_name_len = 0;
+       if (cp->pe_data_len) {
+               if (!cp->pe_data || !cp->dest) {
+                       IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+                       return;
+               }
+               pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+       }
+
+       spin_lock(&curr_sb_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               len = sizeof(struct ip_vs_sync_v6);
+       else
+#endif
+               len = sizeof(struct ip_vs_sync_v4);
+
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+               len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+       if (cp->pe_data_len)
+               len += cp->pe_data_len + 2;     /* + Param hdr field */
+       if (pe_name_len)
+               len += pe_name_len + 2;
+
+       /* check if there is a space for this one  */
+       pad = 0;
+       if (curr_sb) {
+               pad = (4 - (size_t)curr_sb->head) & 3;
+               if (curr_sb->head + len + pad > curr_sb->end) {
+                       sb_queue_tail(curr_sb);
+                       curr_sb = NULL;
+                       pad = 0;
+               }
+       }
+
+       if (!curr_sb) {
+               if (!(curr_sb=ip_vs_sync_buff_create())) {
+                       spin_unlock(&curr_sb_lock);
+                       pr_err("ip_vs_sync_buff_create failed.\n");
+                       return;
+               }
+       }
+
+       m = curr_sb->mesg;
+       p = curr_sb->head;
+       curr_sb->head += pad + len;
+       m->size += pad + len;
+       /* Add ev. padding from prev. sync_conn */
+       while (pad--)
+               *(p++) = 0;
+
+       s = (union ip_vs_sync_conn *)p;
+
+       /* Set message type  & copy members */
+       s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+       s->v4.ver_size = htons(len & SVER_MASK);        /* Version 0 */
+       s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+       s->v4.state = htons(cp->state);
+       s->v4.protocol = cp->protocol;
+       s->v4.cport = cp->cport;
+       s->v4.vport = cp->vport;
+       s->v4.dport = cp->dport;
+       s->v4.fwmark = htonl(cp->fwmark);
+       s->v4.timeout = htonl(cp->timeout / HZ);
+       m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6) {
+               p += sizeof(struct ip_vs_sync_v6);
+               ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+               ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+               ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+       } else
+#endif
+       {
+               p += sizeof(struct ip_vs_sync_v4);      /* options ptr */
+               s->v4.caddr = cp->caddr.ip;
+               s->v4.vaddr = cp->vaddr.ip;
+               s->v4.daddr = cp->daddr.ip;
+       }
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+               *(p++) = IPVS_OPT_SEQ_DATA;
+               *(p++) = sizeof(struct ip_vs_sync_conn_options);
+               hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+               p += sizeof(struct ip_vs_seq);
+               hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+               p += sizeof(struct ip_vs_seq);
+       }
+       /* Handle pe data */
+       if (cp->pe_data_len && cp->pe_data) {
+               *(p++) = IPVS_OPT_PE_DATA;
+               *(p++) = cp->pe_data_len;
+               memcpy(p, cp->pe_data, cp->pe_data_len);
+               p += cp->pe_data_len;
+               if (pe_name_len) {
+                       /* Add PE_NAME */
+                       *(p++) = IPVS_OPT_PE_NAME;
+                       *(p++) = pe_name_len;
+                       memcpy(p, cp->pe->name, pe_name_len);
+                       p += pe_name_len;
+               }
+       }
+
+       spin_unlock(&curr_sb_lock);
+
+control:
+       /* synchronize its controller if it has */
+       cp = cp->control;
+       if (!cp)
+               return;
+       /*
+        * Reduce sync rate for templates
+        * i.e only increment in_pkts for Templates.
+        */
+       if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+               int pkts = atomic_add_return(1, &cp->in_pkts);
+
+               if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
+                       return;
+       }
+       goto sloop;
+}
+
+/*
+ *  fill_param used by version 1
+ */
 static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
-                          const union nf_inet_addr *caddr, __be16 cport,
-                          const union nf_inet_addr *vaddr, __be16 vport,
-                          struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
+                          struct ip_vs_conn_param *p,
+                          __u8 *pe_data, unsigned int pe_data_len,
+                          __u8 *pe_name, unsigned int pe_name_len)
 {
-       /* XXX: Need to take into account persistence engine */
-       ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_conn_fill_param(af, sc->v6.protocol,
+                                     (const union nf_inet_addr *)&sc->v6.caddr,
+                                     sc->v6.cport,
+                                     (const union nf_inet_addr *)&sc->v6.vaddr,
+                                     sc->v6.vport, p);
+       else
+#endif
+               ip_vs_conn_fill_param(af, sc->v4.protocol,
+                                     (const union nf_inet_addr *)&sc->v4.caddr,
+                                     sc->v4.cport,
+                                     (const union nf_inet_addr *)&sc->v4.vaddr,
+                                     sc->v4.vport, p);
+       /* Handle pe data */
+       if (pe_data_len) {
+               if (pe_name_len) {
+                       char buff[IP_VS_PENAME_MAXLEN+1];
+
+                       memcpy(buff, pe_name, pe_name_len);
+                       buff[pe_name_len]=0;
+                       p->pe = __ip_vs_pe_getbyname(buff);
+                       if (!p->pe) {
+                               IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff);
+                               return 1;
+                       }
+               } else {
+                       IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+                       return 1;
+               }
+
+               p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+               if (!p->pe_data) {
+                       if (p->pe->module)
+                               module_put(p->pe->module);
+                       return -ENOMEM;
+               }
+               memcpy(p->pe_data, pe_data, pe_data_len);
+               p->pe_data_len = pe_data_len;
+       }
        return 0;
 }
 
 /*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct ip_vs_conn_param *param,  unsigned flags,
+                           unsigned state, unsigned protocol, unsigned type,
+                           const union nf_inet_addr *daddr, __be16 dport,
+                           unsigned long timeout, __u32 fwmark,
+                           struct ip_vs_sync_conn_options *opt,
+                           struct ip_vs_protocol *pp)
 {
-       struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-       struct ip_vs_sync_conn *s;
-       struct ip_vs_sync_conn_options *opt;
-       struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp;
        struct ip_vs_dest *dest;
-       struct ip_vs_conn_param param;
-       char *p;
-       int i;
+       struct ip_vs_conn *cp;
 
-       if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-               IP_VS_ERR_RL("sync message header too short\n");
-               return;
-       }
 
-       /* Convert size back to host byte order */
-       m->size = ntohs(m->size);
+       if (!(flags & IP_VS_CONN_F_TEMPLATE))
+               cp = ip_vs_conn_in_get(param);
+       else
+               cp = ip_vs_ct_in_get(param);
 
-       if (buflen != m->size) {
-               IP_VS_ERR_RL("bogus sync message size\n");
-               return;
-       }
+       if (cp && param->pe_data)       /* Free pe_data */
+               kfree(param->pe_data);
+       if (!cp) {
+               /*
+                * Find the appropriate destination for the connection.
+                * If it is not found the connection will remain unbound
+                * but still handled.
+                */
+               dest = ip_vs_find_dest(type, daddr, dport, param->vaddr,
+                                      param->vport, protocol, fwmark);
 
-       /* SyncID sanity check */
-       if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-               IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-                         m->syncid);
-               return;
+               /*  Set the approprite ativity flag */
+               if (protocol == IPPROTO_TCP) {
+                       if (state != IP_VS_TCP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               } else if (protocol == IPPROTO_SCTP) {
+                       if (state != IP_VS_SCTP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+               cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+               if (!cp) {
+                       if (param->pe_data)
+                               kfree(param->pe_data);
+                       IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+                       return;
+               }
+       } else if (!cp->dest) {
+               dest = ip_vs_try_bind_dest(cp);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+               (cp->state != state)) {
+               /* update active/inactive flag for the connection */
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state != IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags |= IP_VS_CONN_F_INACTIVE;
+               } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state == IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_inc(&dest->activeconns);
+                       atomic_dec(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+               (cp->state != state)) {
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+               (state != IP_VS_SCTP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
        }
 
-       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+       if (opt)
+               memcpy(&cp->in_seq, opt, sizeof(*opt));
+       atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+       cp->state = state;
+       cp->old_state = cp->state;
+       /*
+        * For Ver 0 messages style
+        *  - Not possible to recover the right timeout for templates
+        *  - can not find the right fwmark
+        *    virtual service. If needed, we can do it for
+        *    non-fwmark persistent services.
+        * Ver 1 messages style.
+        *  - No problem.
+        */
+       if (timeout) {
+               if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+                       timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+               cp->timeout = timeout*HZ;
+       } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+               cp->timeout = pp->timeout_table[state];
+       else
+               cp->timeout = (3*60*HZ);
+       ip_vs_conn_put(cp);
+}
+
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
+{
+       struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+       struct ip_vs_sync_conn_v0 *s;
+       struct ip_vs_sync_conn_options *opt;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       char *p;
+       int i;
+
+       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
        for (i=0; i<m->nr_conns; i++) {
                unsigned flags, state;
 
                if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-                       IP_VS_ERR_RL("bogus conn in sync message\n");
+                       IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
                        return;
                }
-               s = (struct ip_vs_sync_conn *) p;
+               s = (struct ip_vs_sync_conn_v0 *) p;
                flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
                flags &= ~IP_VS_CONN_F_HASHED;
                if (flags & IP_VS_CONN_F_SEQ_MASK) {
                        opt = (struct ip_vs_sync_conn_options *)&s[1];
                        p += FULL_CONN_SIZE;
                        if (p > buffer+buflen) {
-                               IP_VS_ERR_RL("bogus conn options in sync message\n");
+                               IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
                                return;
                        }
                } else {
@@ -362,12 +868,12 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
                        pp = ip_vs_proto_get(s->protocol);
                        if (!pp) {
-                               IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
                                        s->protocol);
                                continue;
                        }
                        if (state >= pp->num_states) {
-                               IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
                                        pp->name, state);
                                continue;
                        }
@@ -375,105 +881,273 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                        /* protocol in templates is not used for state/timeout */
                        pp = NULL;
                        if (state > 0) {
-                               IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
                                        state);
                                state = 0;
                        }
                }
 
-               {
-                       if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
-                                             (union nf_inet_addr *)&s->caddr,
-                                             s->cport,
-                                             (union nf_inet_addr *)&s->vaddr,
-                                             s->vport, &param)) {
-                               pr_err("ip_vs_conn_fill_param_sync failed");
-                               return;
+               ip_vs_conn_fill_param(AF_INET, s->protocol,
+                                     (const union nf_inet_addr *)&s->caddr,
+                                     s->cport,
+                                     (const union nf_inet_addr *)&s->vaddr,
+                                     s->vport, &param);
+
+               /* Send timeout as Zero */
+               ip_vs_proc_conn(&param, flags, state, s->protocol, AF_INET,
+                               (union nf_inet_addr *)&s->daddr, s->dport,
+                               0, 0, opt, pp);
+       }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+                                   __u32 *opt_flags,
+                                   struct ip_vs_sync_conn_options *opt)
+{
+       struct ip_vs_sync_conn_options *topt;
+
+       topt = (struct ip_vs_sync_conn_options *)p;
+
+       if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+               IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+               return -EINVAL;
+       }
+       if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+               IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+               return -EINVAL;
+       }
+       ntoh_seq(&topt->in_seq, &opt->in_seq);
+       ntoh_seq(&topt->out_seq, &opt->out_seq);
+       *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+       return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+                         __u8 **data, unsigned int maxlen,
+                         __u32 *opt_flags, __u32 flag)
+{
+       if (plen > maxlen) {
+               IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+               return -EINVAL;
+       }
+       if (*opt_flags & flag) {
+               IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+               return -EINVAL;
+       }
+       *data_len = plen;
+       *data = p;
+       *opt_flags |= flag;
+       return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
+{
+       struct ip_vs_sync_conn_options opt;
+       union  ip_vs_sync_conn *s;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       __u32 flags;
+       unsigned int af, state, pe_data_len=0, pe_name_len=0;
+       __u8 *pe_data=NULL, *pe_name=NULL;
+       __u32 opt_flags=0;
+       int retc=0;
+
+       s = (union ip_vs_sync_conn *) p;
+
+       if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+               af = AF_INET6;
+               p += sizeof(struct ip_vs_sync_v6);
+#else
+               IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+               retc = 10;
+               goto out;
+#endif
+       } else if (!s->v4.type) {
+               af = AF_INET;
+               p += sizeof(struct ip_vs_sync_v4);
+       } else {
+               return -10;
+       }
+       if (p > msg_end)
+               return -20;
+
+       /* Process optional params check Type & Len. */
+       while (p < msg_end) {
+               int ptype;
+               int plen;
+
+               if (p+2 > msg_end)
+                       return -30;
+               ptype = *(p++);
+               plen  = *(p++);
+
+               if (!plen || ((p + plen) > msg_end))
+                       return -40;
+               /* Handle seq option  p = param data */
+               switch (ptype & ~IPVS_OPT_F_PARAM) {
+               case IPVS_OPT_SEQ_DATA:
+                       if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+                               return -50;
+                       break;
+
+               case IPVS_OPT_PE_DATA:
+                       if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+                                          IP_VS_PEDATA_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_DATA))
+                               return -60;
+                       break;
+
+               case IPVS_OPT_PE_NAME:
+                       if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+                                          IP_VS_PENAME_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_NAME))
+                               return -70;
+                       break;
+
+               default:
+                       /* Param data mandatory ? */
+                       if (!(ptype & IPVS_OPT_F_PARAM)) {
+                               IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+                                         ptype & ~IPVS_OPT_F_PARAM);
+                               retc = 20;
+                               goto out;
                        }
-                       if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                               cp = ip_vs_conn_in_get(&param);
-                       else
-                               cp = ip_vs_ct_in_get(&param);
                }
-               if (!cp) {
-                       /*
-                        * Find the appropriate destination for the connection.
-                        * If it is not found the connection will remain unbound
-                        * but still handled.
-                        */
-                       dest = ip_vs_find_dest(AF_INET,
-                                              (union nf_inet_addr *)&s->daddr,
-                                              s->dport,
-                                              (union nf_inet_addr *)&s->vaddr,
-                                              s->vport,
-                                              s->protocol);
-                       /*  Set the approprite ativity flag */
-                       if (s->protocol == IPPROTO_TCP) {
-                               if (state != IP_VS_TCP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
-                       } else if (s->protocol == IPPROTO_SCTP) {
-                               if (state != IP_VS_SCTP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
+               p += plen;  /* Next option */
+       }
+
+       /* Get flags and Mask off unsupported */
+       flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+       flags |= IP_VS_CONN_F_SYNC;
+       state = ntohs(s->v4.state);
+
+       if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+               pp = ip_vs_proto_get(s->v4.protocol);
+               if (!pp) {
+                       IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+                               s->v4.protocol);
+                       retc = 30;
+                       goto out;
+               }
+               if (state >= pp->num_states) {
+                       IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+                               pp->name, state);
+                       retc = 40;
+                       goto out;
+               }
+       } else {
+               /* protocol in templates is not used for state/timeout */
+               pp = NULL;
+               if (state > 0) {
+                       IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+                               state);
+                       state = 0;
+               }
+       }
+       if (ip_vs_conn_fill_param_sync(af, s, &param,
+                                       pe_data, pe_data_len,
+                                       pe_name, pe_name_len)) {
+               retc = 50;
+               goto out;
+       }
+       /* If only IPv4, just silent skip IPv6 */
+       if (af == AF_INET)
+               ip_vs_proc_conn(&param, flags, state, s->v4.protocol, af,
+                               (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+                               ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+                               pp);
+#ifdef CONFIG_IP_VS_IPV6
+       else
+               ip_vs_proc_conn(&param, flags, state, s->v6.protocol, af,
+                               (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+                               ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+                               pp);
+#endif
+       return 0;
+       /* Error exit */
+out:
+       IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+       return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
+{
+       struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+       __u8 *p, *msg_end;
+       int i, nr_conns;
+
+       if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+               IP_VS_DBG(2, "BACKUP, message header too short\n");
+               return;
+       }
+       /* Convert size back to host byte order */
+       m2->size = ntohs(m2->size);
+
+       if (buflen != m2->size) {
+               IP_VS_DBG(2, "BACKUP, bogus message size\n");
+               return;
+       }
+       /* SyncID sanity check */
+       if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) {
+               IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+               return;
+       }
+       /* Handle version 1  message */
+       if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+           && (m2->spare == 0)) {
+
+               msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+               nr_conns = m2->nr_conns;
+
+               for (i=0; i<nr_conns; i++) {
+                       union ip_vs_sync_conn *s;
+                       unsigned size;
+                       int retc;
+
+                       p = msg_end;
+                       if (p + sizeof(s->v4) > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+                               return;
                        }
-                       cp = ip_vs_conn_new(&param,
-                                           (union nf_inet_addr *)&s->daddr,
-                                           s->dport, flags, dest);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-                       if (!cp) {
-                               pr_err("ip_vs_conn_new failed\n");
+                       s = (union ip_vs_sync_conn *)p;
+                       size = ntohs(s->v4.ver_size) & SVER_MASK;
+                       msg_end = p + size;
+                       /* Basic sanity checks */
+                       if (msg_end  > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
                                return;
                        }
-               } else if (!cp->dest) {
-                       dest = ip_vs_try_bind_dest(cp);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-                          (cp->state != state)) {
-                       /* update active/inactive flag for the connection */
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state != IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_dec(&dest->activeconns);
-                               atomic_inc(&dest->inactconns);
-                               cp->flags |= IP_VS_CONN_F_INACTIVE;
-                       } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state == IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_inc(&dest->activeconns);
-                               atomic_dec(&dest->inactconns);
-                               cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+                                             ntohs(s->v4.ver_size) >> SVER_SHIFT);
+                               return;
                        }
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-                          (cp->state != state)) {
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                            (state != IP_VS_SCTP_S_ESTABLISHED)) {
-                           atomic_dec(&dest->activeconns);
-                           atomic_inc(&dest->inactconns);
-                           cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       /* Process a single sync_conn */
+                       if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+                                            retc);
+                               return;
                        }
+                       /* Make sure we have 32 bit alignment */
+                       msg_end = p + ((size + 3) & ~3);
                }
-
-               if (opt)
-                       memcpy(&cp->in_seq, opt, sizeof(*opt));
-               atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-               cp->state = state;
-               cp->old_state = cp->state;
-               /*
-                * We can not recover the right timeout for templates
-                * in all cases, we can not find the right fwmark
-                * virtual service. If needed, we can do it for
-                * non-fwmark persistent services.
-                */
-               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-                       cp->timeout = pp->timeout_table[state];
-               else
-                       cp->timeout = (3*60*HZ);
-               ip_vs_conn_put(cp);
+       } else {
+               /* Old type of message */
+               ip_vs_process_message_v0(buffer, buflen);
+               return;
        }
 }
 
@@ -851,7 +1525,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 
        IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-                 sizeof(struct ip_vs_sync_conn));
+                 sizeof(struct ip_vs_sync_conn_v0));
 
        if (state == IP_VS_STATE_MASTER) {
                if (sync_master_thread)
index 5325a3fbe4ac8e8ab5a2e8175f9a5b93e1cc663c..1f2a4e35fb115bef5da5eba86df4500733c4213c 100644 (file)
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
                        .fl4_tos = RT_TOS(iph->tos),
                        .mark = skb->mark,
                };
-               struct rtable *rt;
 
                if (ip_route_output_key(net, &rt, &fl))
                        return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                ip_rt_put(rt);
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
                                 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        df |= (old_iph->frag_off & htons(IP_DF));
 
-       if ((old_iph->frag_off & htons(IP_DF))
-           && mtu < ntohs(old_iph->tot_len)) {
+       if ((old_iph->frag_off & htons(IP_DF) &&
+           mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
        if (skb_dst(skb))
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
-       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+           !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                ip_rt_put(rt);
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
index e61511929c66c99b02286388a41e909d8e8fc6fc..e95ac42ef673ca5d0c0a9a3320f93e533869c090 100644 (file)
@@ -655,7 +655,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
         * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
         */
        memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-              sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+              offsetof(struct nf_conn, proto) -
+              offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
        spin_lock_init(&ct->lock);
        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
        ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
index a20fb0bd1efe850543a9dea62fdf4bf53330a335..4a9ed23180df090e41c5f181c68e6e32f4fa51e4 100644 (file)
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
        const struct nf_conntrack_expect_policy *p;
        unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
-       atomic_inc(&exp->use);
+       /* two references : one for hash insert, one for the timer */
+       atomic_add(2, &exp->use);
 
        if (master_help) {
                hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
        setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
                    (unsigned long)exp);
        if (master_help) {
-               p = &master_help->helper->expect_policy[exp->class];
+               p = &rcu_dereference_protected(
+                               master_help->helper,
+                               lockdep_is_held(&nf_conntrack_lock)
+                               )->expect_policy[exp->class];
                exp->timeout.expires = jiffies + p->timeout * HZ;
        }
        add_timer(&exp->timeout);
 
-       atomic_inc(&exp->use);
        NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
        if (!del_timer(&i->timeout))
                return 0;
 
-       p = &master_help->helper->expect_policy[i->class];
+       p = &rcu_dereference_protected(
+               master_help->helper,
+               lockdep_is_held(&nf_conntrack_lock)
+               )->expect_policy[i->class];
        i->timeout.expires = jiffies + p->timeout * HZ;
        add_timer(&i->timeout);
        return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
        }
        /* Will be over limit? */
        if (master_help) {
-               p = &master_help->helper->expect_policy[expect->class];
+               p = &rcu_dereference_protected(
+                       master_help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       )->expect_policy[expect->class];
                if (p->max_expected &&
                    master_help->expecting[expect->class] >= p->max_expected) {
                        evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
        struct hlist_node *n;
 
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                if (n)
                        return n;
        }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
        }
        return head;
 }
index bd82450c193f5dbb4895f4fc7565040686fa740d..80a23ed62bb0739c2d9d97f195dbea57216230b5 100644 (file)
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
        /* This assumes that extended areas in conntrack for the types
           whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
        for (i = min; i <= max; i++) {
-               t1 = nf_ct_ext_types[i];
+               t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                if (!t1)
                        continue;
 
-               t1->alloc_size = sizeof(struct nf_ct_ext)
-                                + ALIGN(sizeof(struct nf_ct_ext), t1->align)
-                                + t1->len;
+               t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+                                t1->len;
                for (j = 0; j < NF_CT_EXT_NUM; j++) {
-                       t2 = nf_ct_ext_types[j];
+                       t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                        if (t2 == NULL || t2 == t1 ||
                            (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
                                continue;
index 59e1a4cd4e8b8b115c77e788fe2b821d50f32f65..767bbe98a0f06c39234cc9fc31bfabe1b42575fb 100644 (file)
@@ -158,7 +158,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
        struct nf_conn_help *help = nfct_help(ct);
 
-       if (help && help->helper == me) {
+       if (help && rcu_dereference_protected(
+                       help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       ) == me) {
                nf_conntrack_event(IPCT_HELPER, ct);
                rcu_assign_pointer(help->helper, NULL);
        }
@@ -210,7 +213,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
                hlist_for_each_entry_safe(exp, n, next,
                                          &net->ct.expect_hash[i], hnode) {
                        struct nf_conn_help *help = nfct_help(exp->master);
-                       if ((help->helper == me || exp->helper == me) &&
+                       if ((rcu_dereference_protected(
+                                       help->helper,
+                                       lockdep_is_held(&nf_conntrack_lock)
+                                       ) == me || exp->helper == me) &&
                            del_timer(&exp->timeout)) {
                                nf_ct_unlink_expect(exp);
                                nf_ct_expect_put(exp);
index 0cdba50c0d69be7ad5ddfed136e1be9a20a6f338..9eabaa6f28a81e7bc8a3934112bd621d6e1074e8 100644 (file)
@@ -1378,6 +1378,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
        }
 #endif
 
+       memset(&ct->proto, 0, sizeof(ct->proto));
        if (cda[CTA_PROTOINFO]) {
                err = ctnetlink_change_protoinfo(ct, cda);
                if (err < 0)
index dc7bb74110df22818b42222450f0141068b79b6d..5701c8dd783c02df1ef6d055f473845754e37d30 100644 (file)
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
 int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
        int ret = 0;
+       struct nf_conntrack_l3proto *old;
 
        if (proto->l3proto >= AF_MAX)
                return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
                return -EINVAL;
 
        mutex_lock(&nf_ct_proto_mutex);
-       if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+       old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                       lockdep_is_held(&nf_ct_proto_mutex));
+       if (old != &nf_conntrack_l3proto_generic) {
                ret = -EBUSY;
                goto out_unlock;
        }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
        BUG_ON(proto->l3proto >= AF_MAX);
 
        mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+       BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                        lockdep_is_held(&nf_ct_proto_mutex)
+                                        ) != proto);
        rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
                           &nf_conntrack_l3proto_generic);
        nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
        mutex_lock(&nf_ct_proto_mutex);
        if (!nf_ct_protos[l4proto->l3proto]) {
                /* l3proto may be loaded latter. */
-               struct nf_conntrack_l4proto **proto_array;
+               struct nf_conntrack_l4proto __rcu **proto_array;
                int i;
 
                proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                }
 
                for (i = 0; i < MAX_NF_CT_PROTO; i++)
-                       proto_array[i] = &nf_conntrack_l4proto_generic;
+                       RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
 
                /* Before making proto_array visible to lockless readers,
                 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                smp_wmb();
 
                nf_ct_protos[l4proto->l3proto] = proto_array;
-       } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
-                                       &nf_conntrack_l4proto_generic) {
+       } else if (rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != &nf_conntrack_l4proto_generic) {
                ret = -EBUSY;
                goto out_unlock;
        }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
        BUG_ON(l4proto->l3proto >= PF_MAX);
 
        mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+       BUG_ON(rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != l4proto);
        rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
                           &nf_conntrack_l4proto_generic);
        nf_ct_l4proto_unregister_sysctl(l4proto);
index 5292560d6d4aedbfa33c1369a66fc2d72657df8f..9ae57c57c50eaf2b1cd20013a41d72d3acfdb584 100644 (file)
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
        ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
        ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
        ct->proto.dccp.state = CT_DCCP_NONE;
+       ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+       ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+       ct->proto.dccp.handshake_seq = 0;
        return true;
 
 out_invalid:
index c6049c2d5ea8d9be54aa6f1d77accafa3b2eb920..6f4ee70f460b028b3b45c7e8f2adbea47a108cbd 100644 (file)
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
            test_bit(SCTP_CID_COOKIE_ACK, map))
                return false;
 
+       memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
        new_state = SCTP_CONNTRACK_MAX;
        for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
                /* Don't need lock here: this conntrack not in circulation yet */
index 3fb2b73b24dc982629dca06073d82a60eb42d358..6f38d0e2ea4ac55241588f6c8a1704786433c42a 100644 (file)
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
        BUG_ON(th == NULL);
 
        /* Don't need lock here: this conntrack not in circulation yet */
-       new_state
-               = tcp_conntracks[0][get_conntrack_index(th)]
-               [TCP_CONNTRACK_NONE];
+       new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
 
        /* Invalid: delete conntrack */
        if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
        }
 
        if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                /* SYN packet */
                ct->proto.tcp.seen[0].td_end =
                        segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                        ct->proto.tcp.seen[0].td_end;
 
                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-               ct->proto.tcp.seen[1].flags = 0;
        } else if (nf_ct_tcp_loose == 0) {
                /* Don't try to pick up connections. */
                return false;
        } else {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                /*
                 * We are in the middle of a connection,
                 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                ct->proto.tcp.seen[0].td_maxend =
                        ct->proto.tcp.seen[0].td_end +
                        ct->proto.tcp.seen[0].td_maxwin;
-               ct->proto.tcp.seen[0].td_scale = 0;
 
                /* We assume SACK and liberal window checking to handle
                 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                                              IP_CT_TCP_FLAG_BE_LIBERAL;
        }
 
-       ct->proto.tcp.seen[1].td_end = 0;
-       ct->proto.tcp.seen[1].td_maxend = 0;
-       ct->proto.tcp.seen[1].td_maxwin = 0;
-       ct->proto.tcp.seen[1].td_scale = 0;
-
        /* tcp_packet will set them */
-       ct->proto.tcp.state = TCP_CONNTRACK_NONE;
        ct->proto.tcp.last_index = TCP_NONE_SET;
 
        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
index b4d7f0f24b27e9534a97851e7830714c8135a7bf..8257bf64359370ac45a49162e9c003388273245d 100644 (file)
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/rculist_nulls.h>
 
 MODULE_LICENSE("GPL");
 
@@ -56,7 +57,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
        for (st->bucket = 0;
             st->bucket < net->ct.htable_size;
             st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                if (!is_a_nulls(n))
                        return n;
        }
@@ -69,13 +70,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
        while (is_a_nulls(head)) {
                if (likely(get_nulls_value(head) == st->bucket)) {
                        if (++st->bucket >= net->ct.htable_size)
                                return NULL;
                }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                               hlist_nulls_first_rcu(
+                                       &net->ct.hash[st->bucket]));
        }
        return head;
 }
index b07393eab88e2fb86a21d7556f7ce532c807a172..20c775cff2a8023ce603fcf0ccb7a712fbd548bb 100644 (file)
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
        struct nf_logger *t;
        int ret;
 
-       logger = nf_loggers[*pos];
+       logger = rcu_dereference_protected(nf_loggers[*pos],
+                                          lockdep_is_held(&nf_log_mutex));
 
        if (!logger)
                ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
                mutex_unlock(&nf_log_mutex);
        } else {
                mutex_lock(&nf_log_mutex);
-               logger = nf_loggers[tindex];
+               logger = rcu_dereference_protected(nf_loggers[tindex],
+                                                  lockdep_is_held(&nf_log_mutex));
                if (!logger)
                        table->data = "NONE";
                else
index 74aebed5bd28bb5c0c924cec7d908615b82ffdd6..1876f74115618187521498ec5d91445569b3a623 100644 (file)
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
        int ret;
+       const struct nf_queue_handler *old;
 
        if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] == qh)
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old == qh)
                ret = -EEXIST;
-       else if (queue_handler[pf])
+       else if (old)
                ret = -EBUSY;
        else {
                rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
+       const struct nf_queue_handler *old;
+
        if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] && queue_handler[pf] != qh) {
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old && old != qh) {
                mutex_unlock(&queue_handler_mutex);
                return -EINVAL;
        }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 
        mutex_lock(&queue_handler_mutex);
        for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-               if (queue_handler[pf] == qh)
+               if (rcu_dereference_protected(
+                               queue_handler[pf],
+                               lockdep_is_held(&queue_handler_mutex)
+                               ) == qh)
                        rcu_assign_pointer(queue_handler[pf], NULL);
        }
        mutex_unlock(&queue_handler_mutex);
index 6a1572b0ab416a65425abc135cca9d93a4769911..91592da504b9928ee38f7eb84fdfd587f236a6b9 100644 (file)
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
                if (!hlist_empty(&instance_table[st->bucket]))
-                       return rcu_dereference_bh(instance_table[st->bucket].first);
+                       return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
        }
        return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-       h = rcu_dereference_bh(h->next);
+       h = rcu_dereference_bh(hlist_next_rcu(h));
        while (!h) {
                if (++st->bucket >= INSTANCE_BUCKETS)
                        return NULL;
 
-               h = rcu_dereference_bh(instance_table[st->bucket].first);
+               h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
        }
        return h;
 }
index c2c0e4abeb996fe689ebac8e964a9aa11163ce9f..af9c4dadf8165922af9ee23b02abc48047496a87 100644 (file)
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Xtables: Qdisc classification");
 MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
 
 static unsigned int
 classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
        return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg __read_mostly = {
-       .name       = "CLASSIFY",
-       .revision   = 0,
-       .family     = NFPROTO_UNSPEC,
-       .table      = "mangle",
-       .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-                     (1 << NF_INET_POST_ROUTING),
-       .target     = classify_tg,
-       .targetsize = sizeof(struct xt_classify_target_info),
-       .me         = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_UNSPEC,
+               .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+                             (1 << NF_INET_POST_ROUTING),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_ARP,
+               .hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
 };
 
 static int __init classify_tg_init(void)
 {
-       return xt_register_target(&classify_tg_reg);
+       return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 static void __exit classify_tg_exit(void)
 {
-       xt_unregister_target(&classify_tg_reg);
+       xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 module_init(classify_tg_init);
index 039cce1bde3dc60ccdbb3e6857bb2685dc123079..39627706aac677853fae0e2499f4668b493ef8c3 100644 (file)
@@ -72,10 +72,12 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
        if (info->queues_total > 1) {
                if (par->family == NFPROTO_IPV4)
-                       queue = hash_v4(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v4(skb) * info->queues_total) >>
+                                32) + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
                else if (par->family == NFPROTO_IPV6)
-                       queue = hash_v6(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v6(skb) * info->queues_total) >>
+                                32) + queue;
 #endif
        }
        return NF_QUEUE_NR(queue);