]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorLinus Torvalds <torvalds@g5.osdl.org>
Thu, 10 Nov 2005 03:32:25 +0000 (19:32 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Thu, 10 Nov 2005 03:32:25 +0000 (19:32 -0800)
69 files changed:
include/linux/genetlink.h [new file with mode: 0644]
include/linux/netfilter/nf_conntrack_common.h [new file with mode: 0644]
include/linux/netfilter/nf_conntrack_ftp.h [new file with mode: 0644]
include/linux/netfilter/nf_conntrack_sctp.h [new file with mode: 0644]
include/linux/netfilter/nf_conntrack_tcp.h [new file with mode: 0644]
include/linux/netfilter/nf_conntrack_tuple_common.h [new file with mode: 0644]
include/linux/netfilter_ipv4/ip_conntrack.h
include/linux/netfilter_ipv4/ip_conntrack_ftp.h
include/linux/netfilter_ipv4/ip_conntrack_icmp.h
include/linux/netfilter_ipv4/ip_conntrack_sctp.h
include/linux/netfilter_ipv4/ip_conntrack_tcp.h
include/linux/netfilter_ipv4/ip_conntrack_tuple.h
include/linux/netfilter_ipv6.h
include/linux/netlink.h
include/linux/skbuff.h
include/linux/sysctl.h
include/net/genetlink.h [new file with mode: 0644]
include/net/netfilter/ipv4/nf_conntrack_icmp.h [new file with mode: 0644]
include/net/netfilter/ipv4/nf_conntrack_ipv4.h [new file with mode: 0644]
include/net/netfilter/ipv6/nf_conntrack_icmpv6.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_compat.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_core.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_helper.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_l3proto.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_protocol.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack_tuple.h [new file with mode: 0644]
include/net/netlink.h [new file with mode: 0644]
net/core/rtnetlink.c
net/core/skbuff.c
net/ipv4/inet_diag.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/ip_conntrack_netlink.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_CONNMARK.c
net/ipv4/netfilter/ipt_NOTRACK.c
net/ipv4/netfilter/ipt_connbytes.c
net/ipv4/netfilter/ipt_connmark.c
net/ipv4/netfilter/ipt_conntrack.c
net/ipv4/netfilter/ipt_helper.c
net/ipv4/netfilter/ipt_state.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nf_conntrack_proto_icmp.c [new file with mode: 0644]
net/ipv6/ip6_input.c
net/ipv6/ip6_output.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/ip6t_MARK.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c [new file with mode: 0644]
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c [new file with mode: 0644]
net/ipv6/netfilter/nf_conntrack_reasm.c [new file with mode: 0644]
net/ipv6/raw.c
net/ipv6/route.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_conntrack_core.c [new file with mode: 0644]
net/netfilter/nf_conntrack_ftp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_l3proto_generic.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_generic.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_sctp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_tcp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_udp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_standalone.c [new file with mode: 0644]
net/netlink/Makefile
net/netlink/af_netlink.c
net/netlink/attr.c [new file with mode: 0644]
net/netlink/genetlink.c [new file with mode: 0644]
net/xfrm/xfrm_user.c

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
new file mode 100644 (file)
index 0000000..84f12a4
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef __LINUX_GENERIC_NETLINK_H
+#define __LINUX_GENERIC_NETLINK_H
+
+#include <linux/netlink.h>
+
+#define GENL_NAMSIZ    16      /* length of family name */
+
+#define GENL_MIN_ID    NLMSG_MIN_TYPE
+#define GENL_MAX_ID    1023
+
+struct genlmsghdr {
+       __u8    cmd;
+       __u8    version;
+       __u16   reserved;
+};
+
+#define GENL_HDRLEN    NLMSG_ALIGN(sizeof(struct genlmsghdr))
+
+/*
+ * List of reserved static generic netlink identifiers:
+ */
+#define GENL_ID_GENERATE       0
+#define GENL_ID_CTRL           NLMSG_MIN_TYPE
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+enum {
+       CTRL_CMD_UNSPEC,
+       CTRL_CMD_NEWFAMILY,
+       CTRL_CMD_DELFAMILY,
+       CTRL_CMD_GETFAMILY,
+       CTRL_CMD_NEWOPS,
+       CTRL_CMD_DELOPS,
+       CTRL_CMD_GETOPS,
+       __CTRL_CMD_MAX,
+};
+
+#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
+
+enum {
+       CTRL_ATTR_UNSPEC,
+       CTRL_ATTR_FAMILY_ID,
+       CTRL_ATTR_FAMILY_NAME,
+       __CTRL_ATTR_MAX,
+};
+
+#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
+
+#endif /* __LINUX_GENERIC_NETLINK_H */
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
new file mode 100644 (file)
index 0000000..6d39b51
--- /dev/null
@@ -0,0 +1,159 @@
+#ifndef _NF_CONNTRACK_COMMON_H
+#define _NF_CONNTRACK_COMMON_H
+/* Connection state tracking for netfilter.  This is separated from,
+   but required by, the NAT layer; it can also be used by an iptables
+   extension. */
+enum ip_conntrack_info
+{
+       /* Part of an established connection (either direction). */
+       IP_CT_ESTABLISHED,
+
+       /* Like NEW, but related to an existing connection, or ICMP error
+          (in either direction). */
+       IP_CT_RELATED,
+
+       /* Started a new connection to track (only
+           IP_CT_DIR_ORIGINAL); may be a retransmission. */
+       IP_CT_NEW,
+
+       /* >= this indicates reply direction */
+       IP_CT_IS_REPLY,
+
+       /* Number of distinct IP_CT types (no NEW in reply dirn). */
+       IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
+};
+
+/* Bitset representing status of connection. */
+enum ip_conntrack_status {
+       /* It's an expected connection: bit 0 set.  This bit never changed */
+       IPS_EXPECTED_BIT = 0,
+       IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
+
+       /* We've seen packets both ways: bit 1 set.  Can be set, not unset. */
+       IPS_SEEN_REPLY_BIT = 1,
+       IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
+
+       /* Conntrack should never be early-expired. */
+       IPS_ASSURED_BIT = 2,
+       IPS_ASSURED = (1 << IPS_ASSURED_BIT),
+
+       /* Connection is confirmed: originating packet has left box */
+       IPS_CONFIRMED_BIT = 3,
+       IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
+
+       /* Connection needs src nat in orig dir.  This bit never changed. */
+       IPS_SRC_NAT_BIT = 4,
+       IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
+
+       /* Connection needs dst nat in orig dir.  This bit never changed. */
+       IPS_DST_NAT_BIT = 5,
+       IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
+
+       /* Both together. */
+       IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
+
+       /* Connection needs TCP sequence adjusted. */
+       IPS_SEQ_ADJUST_BIT = 6,
+       IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
+
+       /* NAT initialization bits. */
+       IPS_SRC_NAT_DONE_BIT = 7,
+       IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
+
+       IPS_DST_NAT_DONE_BIT = 8,
+       IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
+
+       /* Both together */
+       IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
+
+       /* Connection is dying (removed from lists), can not be unset. */
+       IPS_DYING_BIT = 9,
+       IPS_DYING = (1 << IPS_DYING_BIT),
+};
+
+/* Connection tracking event bits */
+enum ip_conntrack_events
+{
+       /* New conntrack */
+       IPCT_NEW_BIT = 0,
+       IPCT_NEW = (1 << IPCT_NEW_BIT),
+
+       /* Expected connection */
+       IPCT_RELATED_BIT = 1,
+       IPCT_RELATED = (1 << IPCT_RELATED_BIT),
+
+       /* Destroyed conntrack */
+       IPCT_DESTROY_BIT = 2,
+       IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
+
+       /* Timer has been refreshed */
+       IPCT_REFRESH_BIT = 3,
+       IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
+
+       /* Status has changed */
+       IPCT_STATUS_BIT = 4,
+       IPCT_STATUS = (1 << IPCT_STATUS_BIT),
+
+       /* Update of protocol info */
+       IPCT_PROTOINFO_BIT = 5,
+       IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
+
+       /* Volatile protocol info */
+       IPCT_PROTOINFO_VOLATILE_BIT = 6,
+       IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
+
+       /* New helper for conntrack */
+       IPCT_HELPER_BIT = 7,
+       IPCT_HELPER = (1 << IPCT_HELPER_BIT),
+
+       /* Update of helper info */
+       IPCT_HELPINFO_BIT = 8,
+       IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
+
+       /* Volatile helper info */
+       IPCT_HELPINFO_VOLATILE_BIT = 9,
+       IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
+
+       /* NAT info */
+       IPCT_NATINFO_BIT = 10,
+       IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
+
+       /* Counter highest bit has been set */
+       IPCT_COUNTER_FILLING_BIT = 11,
+       IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
+};
+
+enum ip_conntrack_expect_events {
+       IPEXP_NEW_BIT = 0,
+       IPEXP_NEW = (1 << IPEXP_NEW_BIT),
+};
+
+#ifdef __KERNEL__
+struct ip_conntrack_counter
+{
+       u_int32_t packets;
+       u_int32_t bytes;
+};
+
+struct ip_conntrack_stat
+{
+       unsigned int searched;
+       unsigned int found;
+       unsigned int new;
+       unsigned int invalid;
+       unsigned int ignore;
+       unsigned int delete;
+       unsigned int delete_list;
+       unsigned int insert;
+       unsigned int insert_failed;
+       unsigned int drop;
+       unsigned int early_drop;
+       unsigned int error;
+       unsigned int expect_new;
+       unsigned int expect_create;
+       unsigned int expect_delete;
+};
+
+#endif /* __KERNEL__ */
+
+#endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
new file mode 100644 (file)
index 0000000..ad4a41c
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef _NF_CONNTRACK_FTP_H
+#define _NF_CONNTRACK_FTP_H
+/* FTP tracking. */
+
+/* This enum is exposed to userspace */
+enum ip_ct_ftp_type
+{
+       /* PORT command from client */
+       IP_CT_FTP_PORT,
+       /* PASV response from server */
+       IP_CT_FTP_PASV,
+       /* EPRT command from client */
+       IP_CT_FTP_EPRT,
+       /* EPSV response from server */
+       IP_CT_FTP_EPSV,
+};
+
+#ifdef __KERNEL__
+
+#define FTP_PORT       21
+
+#define NUM_SEQ_TO_REMEMBER 2
+/* This structure exists only once per master */
+struct ip_ct_ftp_master {
+       /* Valid seq positions for cmd matching after newline */
+       u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
+       /* 0 means seq_match_aft_nl not set */
+       int seq_aft_nl_num[IP_CT_DIR_MAX];
+};
+
+struct ip_conntrack_expect;
+
+/* For NAT to hook in when we find a packet which describes what other
+ * connection we should expect. */
+extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+                                      enum ip_conntrack_info ctinfo,
+                                      enum ip_ct_ftp_type type,
+                                      unsigned int matchoff,
+                                      unsigned int matchlen,
+                                      struct ip_conntrack_expect *exp,
+                                      u32 *seq);
+#endif /* __KERNEL__ */
+
+#endif /* _NF_CONNTRACK_FTP_H */
diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h
new file mode 100644 (file)
index 0000000..b8994d9
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _NF_CONNTRACK_SCTP_H
+#define _NF_CONNTRACK_SCTP_H
+/* SCTP tracking. */
+
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
+enum sctp_conntrack {
+       SCTP_CONNTRACK_NONE,
+       SCTP_CONNTRACK_CLOSED,
+       SCTP_CONNTRACK_COOKIE_WAIT,
+       SCTP_CONNTRACK_COOKIE_ECHOED,
+       SCTP_CONNTRACK_ESTABLISHED,
+       SCTP_CONNTRACK_SHUTDOWN_SENT,
+       SCTP_CONNTRACK_SHUTDOWN_RECD,
+       SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
+       SCTP_CONNTRACK_MAX
+};
+
+struct ip_ct_sctp
+{
+       enum sctp_conntrack state;
+
+       u_int32_t vtag[IP_CT_DIR_MAX];
+       u_int32_t ttag[IP_CT_DIR_MAX];
+};
+
+#endif /* _NF_CONNTRACK_SCTP_H */
diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
new file mode 100644 (file)
index 0000000..b2feeff
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef _NF_CONNTRACK_TCP_H
+#define _NF_CONNTRACK_TCP_H
+/* TCP tracking. */
+
+/* This is exposed to userspace (ctnetlink) */
+enum tcp_conntrack {
+       TCP_CONNTRACK_NONE,
+       TCP_CONNTRACK_SYN_SENT,
+       TCP_CONNTRACK_SYN_RECV,
+       TCP_CONNTRACK_ESTABLISHED,
+       TCP_CONNTRACK_FIN_WAIT,
+       TCP_CONNTRACK_CLOSE_WAIT,
+       TCP_CONNTRACK_LAST_ACK,
+       TCP_CONNTRACK_TIME_WAIT,
+       TCP_CONNTRACK_CLOSE,
+       TCP_CONNTRACK_LISTEN,
+       TCP_CONNTRACK_MAX,
+       TCP_CONNTRACK_IGNORE
+};
+
+/* Window scaling is advertised by the sender */
+#define IP_CT_TCP_FLAG_WINDOW_SCALE            0x01
+
+/* SACK is permitted by the sender */
+#define IP_CT_TCP_FLAG_SACK_PERM               0x02
+
+/* This sender sent FIN first */
+#define IP_CT_TCP_FLAG_CLOSE_INIT              0x03
+
+#ifdef __KERNEL__
+
+struct ip_ct_tcp_state {
+       u_int32_t       td_end;         /* max of seq + len */
+       u_int32_t       td_maxend;      /* max of ack + max(win, 1) */
+       u_int32_t       td_maxwin;      /* max(win) */
+       u_int8_t        td_scale;       /* window scale factor */
+       u_int8_t        loose;          /* used when connection picked up from the middle */
+       u_int8_t        flags;          /* per direction options */
+};
+
+struct ip_ct_tcp
+{
+       struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
+       u_int8_t        state;          /* state of the connection (enum tcp_conntrack) */
+       /* For detecting stale connections */
+       u_int8_t        last_dir;       /* Direction of the last packet (enum ip_conntrack_dir) */
+       u_int8_t        retrans;        /* Number of retransmitted packets */
+       u_int8_t        last_index;     /* Index of the last packet */
+       u_int32_t       last_seq;       /* Last sequence number seen in dir */
+       u_int32_t       last_ack;       /* Last sequence number seen in opposite dir */
+       u_int32_t       last_end;       /* Last seq + len */
+};
+
+#endif /* __KERNEL__ */
+
+#endif /* _NF_CONNTRACK_TCP_H */
diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h
new file mode 100644 (file)
index 0000000..8e145f0
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _NF_CONNTRACK_TUPLE_COMMON_H
+#define _NF_CONNTRACK_TUPLE_COMMON_H
+
+enum ip_conntrack_dir
+{
+       IP_CT_DIR_ORIGINAL,
+       IP_CT_DIR_REPLY,
+       IP_CT_DIR_MAX
+};
+
+#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
+
+#endif /* _NF_CONNTRACK_TUPLE_COMMON_H */
index d078bb91d9e5d4b6cf165b6689b5268a0eac7d2d..b3432ab59a175d23304f7dfb5739197b0168f739 100644 (file)
@@ -1,132 +1,7 @@
 #ifndef _IP_CONNTRACK_H
 #define _IP_CONNTRACK_H
-/* Connection state tracking for netfilter.  This is separated from,
-   but required by, the NAT layer; it can also be used by an iptables
-   extension. */
-enum ip_conntrack_info
-{
-       /* Part of an established connection (either direction). */
-       IP_CT_ESTABLISHED,
-
-       /* Like NEW, but related to an existing connection, or ICMP error
-          (in either direction). */
-       IP_CT_RELATED,
-
-       /* Started a new connection to track (only
-           IP_CT_DIR_ORIGINAL); may be a retransmission. */
-       IP_CT_NEW,
-
-       /* >= this indicates reply direction */
-       IP_CT_IS_REPLY,
-
-       /* Number of distinct IP_CT types (no NEW in reply dirn). */
-       IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
-};
-
-/* Bitset representing status of connection. */
-enum ip_conntrack_status {
-       /* It's an expected connection: bit 0 set.  This bit never changed */
-       IPS_EXPECTED_BIT = 0,
-       IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
-
-       /* We've seen packets both ways: bit 1 set.  Can be set, not unset. */
-       IPS_SEEN_REPLY_BIT = 1,
-       IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
-
-       /* Conntrack should never be early-expired. */
-       IPS_ASSURED_BIT = 2,
-       IPS_ASSURED = (1 << IPS_ASSURED_BIT),
-
-       /* Connection is confirmed: originating packet has left box */
-       IPS_CONFIRMED_BIT = 3,
-       IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
-
-       /* Connection needs src nat in orig dir.  This bit never changed. */
-       IPS_SRC_NAT_BIT = 4,
-       IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
-
-       /* Connection needs dst nat in orig dir.  This bit never changed. */
-       IPS_DST_NAT_BIT = 5,
-       IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
-
-       /* Both together. */
-       IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
-
-       /* Connection needs TCP sequence adjusted. */
-       IPS_SEQ_ADJUST_BIT = 6,
-       IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
-
-       /* NAT initialization bits. */
-       IPS_SRC_NAT_DONE_BIT = 7,
-       IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
-
-       IPS_DST_NAT_DONE_BIT = 8,
-       IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
-
-       /* Both together */
-       IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
-
-       /* Connection is dying (removed from lists), can not be unset. */
-       IPS_DYING_BIT = 9,
-       IPS_DYING = (1 << IPS_DYING_BIT),
-};
-
-/* Connection tracking event bits */
-enum ip_conntrack_events
-{
-       /* New conntrack */
-       IPCT_NEW_BIT = 0,
-       IPCT_NEW = (1 << IPCT_NEW_BIT),
-
-       /* Expected connection */
-       IPCT_RELATED_BIT = 1,
-       IPCT_RELATED = (1 << IPCT_RELATED_BIT),
-
-       /* Destroyed conntrack */
-       IPCT_DESTROY_BIT = 2,
-       IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
-
-       /* Timer has been refreshed */
-       IPCT_REFRESH_BIT = 3,
-       IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
-
-       /* Status has changed */
-       IPCT_STATUS_BIT = 4,
-       IPCT_STATUS = (1 << IPCT_STATUS_BIT),
-
-       /* Update of protocol info */
-       IPCT_PROTOINFO_BIT = 5,
-       IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
-
-       /* Volatile protocol info */
-       IPCT_PROTOINFO_VOLATILE_BIT = 6,
-       IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
-
-       /* New helper for conntrack */
-       IPCT_HELPER_BIT = 7,
-       IPCT_HELPER = (1 << IPCT_HELPER_BIT),
-
-       /* Update of helper info */
-       IPCT_HELPINFO_BIT = 8,
-       IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
-
-       /* Volatile helper info */
-       IPCT_HELPINFO_VOLATILE_BIT = 9,
-       IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
 
-       /* NAT info */
-       IPCT_NATINFO_BIT = 10,
-       IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
-
-       /* Counter highest bit has been set */
-       IPCT_COUNTER_FILLING_BIT = 11,
-       IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
-};
-
-enum ip_conntrack_expect_events {
-       IPEXP_NEW_BIT = 0,
-       IPEXP_NEW = (1 << IPEXP_NEW_BIT),
-};
+#include <linux/netfilter/nf_conntrack_common.h>
 
 #ifdef __KERNEL__
 #include <linux/config.h>
@@ -194,12 +69,6 @@ do {                                                                        \
 #define IP_NF_ASSERT(x)
 #endif
 
-struct ip_conntrack_counter
-{
-       u_int32_t packets;
-       u_int32_t bytes;
-};
-
 struct ip_conntrack_helper;
 
 struct ip_conntrack
@@ -426,25 +295,6 @@ static inline int is_dying(struct ip_conntrack *ct)
 
 extern unsigned int ip_conntrack_htable_size;
  
-struct ip_conntrack_stat
-{
-       unsigned int searched;
-       unsigned int found;
-       unsigned int new;
-       unsigned int invalid;
-       unsigned int ignore;
-       unsigned int delete;
-       unsigned int delete_list;
-       unsigned int insert;
-       unsigned int insert_failed;
-       unsigned int drop;
-       unsigned int early_drop;
-       unsigned int error;
-       unsigned int expect_new;
-       unsigned int expect_create;
-       unsigned int expect_delete;
-};
-
 #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
 
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
index 5f06429b9047dd0d6d5f802e466468712eb08105..63811934de4d74cb21bf75d64398e95042e74008 100644 (file)
@@ -1,43 +1,6 @@
 #ifndef _IP_CONNTRACK_FTP_H
 #define _IP_CONNTRACK_FTP_H
-/* FTP tracking. */
 
-#ifdef __KERNEL__
+#include <linux/netfilter/nf_conntrack_ftp.h>
 
-#define FTP_PORT       21
-
-#endif /* __KERNEL__ */
-
-enum ip_ct_ftp_type
-{
-       /* PORT command from client */
-       IP_CT_FTP_PORT,
-       /* PASV response from server */
-       IP_CT_FTP_PASV,
-       /* EPRT command from client */
-       IP_CT_FTP_EPRT,
-       /* EPSV response from server */
-       IP_CT_FTP_EPSV,
-};
-
-#define NUM_SEQ_TO_REMEMBER 2
-/* This structure exists only once per master */
-struct ip_ct_ftp_master {
-       /* Valid seq positions for cmd matching after newline */
-       u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
-       /* 0 means seq_match_aft_nl not set */
-       int seq_aft_nl_num[IP_CT_DIR_MAX];
-};
-
-struct ip_conntrack_expect;
-
-/* For NAT to hook in when we find a packet which describes what other
- * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-                                      enum ip_conntrack_info ctinfo,
-                                      enum ip_ct_ftp_type type,
-                                      unsigned int matchoff,
-                                      unsigned int matchlen,
-                                      struct ip_conntrack_expect *exp,
-                                      u32 *seq);
 #endif /* _IP_CONNTRACK_FTP_H */
index f1664abbe39277d4d21bbade6a4bbcbcd1233b8c..eed5ee3e47442c0c04c8200d6d8a8a832e4e40e7 100644 (file)
@@ -1,11 +1,6 @@
 #ifndef _IP_CONNTRACK_ICMP_H
 #define _IP_CONNTRACK_ICMP_H
-/* ICMP tracking. */
-#include <asm/atomic.h>
 
-struct ip_ct_icmp
-{
-       /* Optimization: when number in == number out, forget immediately. */
-       atomic_t count;
-};
+#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
+
 #endif /* _IP_CONNTRACK_ICMP_H */
index 7a8d869321f70d4de7ced5ea534f42b554541520..4099a041a32ab073db409fec574259f4cb0668c3 100644 (file)
@@ -1,25 +1,6 @@
 #ifndef _IP_CONNTRACK_SCTP_H
 #define _IP_CONNTRACK_SCTP_H
-/* SCTP tracking. */
 
-enum sctp_conntrack {
-       SCTP_CONNTRACK_NONE,
-       SCTP_CONNTRACK_CLOSED,
-       SCTP_CONNTRACK_COOKIE_WAIT,
-       SCTP_CONNTRACK_COOKIE_ECHOED,
-       SCTP_CONNTRACK_ESTABLISHED,
-       SCTP_CONNTRACK_SHUTDOWN_SENT,
-       SCTP_CONNTRACK_SHUTDOWN_RECD,
-       SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
-       SCTP_CONNTRACK_MAX
-};
-
-struct ip_ct_sctp
-{
-       enum sctp_conntrack state;
-
-       u_int32_t vtag[IP_CT_DIR_MAX];
-       u_int32_t ttag[IP_CT_DIR_MAX];
-};
+#include <linux/netfilter/nf_conntrack_sctp.h>
 
 #endif /* _IP_CONNTRACK_SCTP_H */
index 16da044d97a77d46400fbb225d31e67a2592ef02..876b8fb17e68a829cd8324796fb58a71e5adcd34 100644 (file)
@@ -1,51 +1,6 @@
 #ifndef _IP_CONNTRACK_TCP_H
 #define _IP_CONNTRACK_TCP_H
-/* TCP tracking. */
 
-enum tcp_conntrack {
-       TCP_CONNTRACK_NONE,
-       TCP_CONNTRACK_SYN_SENT,
-       TCP_CONNTRACK_SYN_RECV,
-       TCP_CONNTRACK_ESTABLISHED,
-       TCP_CONNTRACK_FIN_WAIT,
-       TCP_CONNTRACK_CLOSE_WAIT,
-       TCP_CONNTRACK_LAST_ACK,
-       TCP_CONNTRACK_TIME_WAIT,
-       TCP_CONNTRACK_CLOSE,
-       TCP_CONNTRACK_LISTEN,
-       TCP_CONNTRACK_MAX,
-       TCP_CONNTRACK_IGNORE
-};
-
-/* Window scaling is advertised by the sender */
-#define IP_CT_TCP_FLAG_WINDOW_SCALE            0x01
-
-/* SACK is permitted by the sender */
-#define IP_CT_TCP_FLAG_SACK_PERM               0x02
-
-/* This sender sent FIN first */
-#define IP_CT_TCP_FLAG_CLOSE_INIT              0x03
-
-struct ip_ct_tcp_state {
-       u_int32_t       td_end;         /* max of seq + len */
-       u_int32_t       td_maxend;      /* max of ack + max(win, 1) */
-       u_int32_t       td_maxwin;      /* max(win) */
-       u_int8_t        td_scale;       /* window scale factor */
-       u_int8_t        loose;          /* used when connection picked up from the middle */
-       u_int8_t        flags;          /* per direction options */
-};
-
-struct ip_ct_tcp
-{
-       struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
-       u_int8_t        state;          /* state of the connection (enum tcp_conntrack) */
-       /* For detecting stale connections */
-       u_int8_t        last_dir;       /* Direction of the last packet (enum ip_conntrack_dir) */
-       u_int8_t        retrans;        /* Number of retransmitted packets */
-       u_int8_t        last_index;     /* Index of the last packet */
-       u_int32_t       last_seq;       /* Last sequence number seen in dir */
-       u_int32_t       last_ack;       /* Last sequence number seen in opposite dir */
-       u_int32_t       last_end;       /* Last seq + len */
-};
+#include <linux/netfilter/nf_conntrack_tcp.h>
 
 #endif /* _IP_CONNTRACK_TCP_H */
index 3232db11a4e54b6894ac35cbe669d1182629e252..2fdabdb4c0ef5d0395cab5dc55d75eff1b378dd1 100644 (file)
@@ -2,6 +2,7 @@
 #define _IP_CONNTRACK_TUPLE_H
 
 #include <linux/types.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
 
 /* A `tuple' is a structure containing the information to uniquely
   identify a connection.  ie. if two packets have the same tuple, they
@@ -88,13 +89,6 @@ struct ip_conntrack_tuple
                (tuple)->dst.u.all = 0;                         \
        } while (0)
 
-enum ip_conntrack_dir
-{
-       IP_CT_DIR_ORIGINAL,
-       IP_CT_DIR_REPLY,
-       IP_CT_DIR_MAX
-};
-
 #ifdef __KERNEL__
 
 #define DUMP_TUPLE(tp)                                         \
@@ -103,8 +97,6 @@ DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n",  \
        NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all),          \
        NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all))
 
-#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
-
 /* If we're the first tuple, it's the original dir. */
 #define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
 
index edcc2c6eb5c702ce8f39c156b685023fbbe34025..53b2983f6278f5a9008074cf53f30e4333e6f15a 100644 (file)
@@ -59,6 +59,7 @@
 
 enum nf_ip6_hook_priorities {
        NF_IP6_PRI_FIRST = INT_MIN,
+       NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
        NF_IP6_PRI_SELINUX_FIRST = -225,
        NF_IP6_PRI_CONNTRACK = -200,
        NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
index ba25ca874c20c1d646abf1d95c0e19da2fcc0f96..6a2ccf78a3564025947dc5f06236155d67351677 100644 (file)
@@ -71,7 +71,8 @@ struct nlmsghdr
 
 #define NLMSG_ALIGNTO  4
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
-#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_HDRLEN    ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
 #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
 #define NLMSG_DATA(nlh)  ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
 #define NLMSG_NEXT(nlh,len)     ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
@@ -86,6 +87,8 @@ struct nlmsghdr
 #define NLMSG_DONE             0x3     /* End of a dump        */
 #define NLMSG_OVERRUN          0x4     /* Data lost            */
 
+#define NLMSG_MIN_TYPE         0x10    /* < 0x10: reserved control messages */
+
 struct nlmsgerr
 {
        int             error;
@@ -108,6 +111,25 @@ enum {
        NETLINK_CONNECTED,
 };
 
+/*
+ *  <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * |        Header       | Pad |     Payload       | Pad |
+ * |   (struct nlattr)   | ing |                   | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ *  <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr
+{
+       __u16           nla_len;
+       __u16           nla_type;
+};
+
+#define NLA_ALIGNTO            4
+#define NLA_ALIGN(len)         (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN             ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
 #ifdef __KERNEL__
 
 #include <linux/capability.h>
index fdfb8fe8c38ce0a6f38f342558b1858a5f6b5f60..83010231db99289ad3f70935f720a723f401ae3a 100644 (file)
@@ -274,6 +274,9 @@ struct sk_buff {
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        __u8                    ipvs_property:1;
 #endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       struct sk_buff          *nfct_reasm;
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        struct nf_bridge_info   *nf_bridge;
 #endif
@@ -1313,10 +1316,26 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
        if (nfct)
                atomic_inc(&nfct->use);
 }
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               atomic_inc(&skb->users);
+}
+static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               kfree_skb(skb);
+}
+#endif
 static inline void nf_reset(struct sk_buff *skb)
 {
        nf_conntrack_put(skb->nfct);
        skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+       skb->nfct_reasm = NULL;
+#endif
 }
 
 #ifdef CONFIG_BRIDGE_NETFILTER
index fc131d6602b989a289f0bd9ff3f4c4f1aa987c71..22cf5e1ac9875c8b02bade643b8f9dedf0e7ba3d 100644 (file)
@@ -205,6 +205,7 @@ enum
        NET_ECONET=16,
        NET_SCTP=17,
        NET_LLC=18,
+       NET_NETFILTER=19,
 };
 
 /* /proc/sys/kernel/random */
@@ -270,6 +271,42 @@ enum
        NET_UNIX_MAX_DGRAM_QLEN=3,
 };
 
+/* /proc/sys/net/netfilter */
+enum
+{
+       NET_NF_CONNTRACK_MAX=1,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
+       NET_NF_CONNTRACK_UDP_TIMEOUT=10,
+       NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
+       NET_NF_CONNTRACK_ICMP_TIMEOUT=12,
+       NET_NF_CONNTRACK_GENERIC_TIMEOUT=13,
+       NET_NF_CONNTRACK_BUCKETS=14,
+       NET_NF_CONNTRACK_LOG_INVALID=15,
+       NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
+       NET_NF_CONNTRACK_TCP_LOOSE=17,
+       NET_NF_CONNTRACK_TCP_BE_LIBERAL=18,
+       NET_NF_CONNTRACK_TCP_MAX_RETRANS=19,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
+       NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
+       NET_NF_CONNTRACK_COUNT=27,
+       NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28,
+       NET_NF_CONNTRACK_FRAG6_TIMEOUT=29,
+       NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30,
+       NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31,
+};
+
 /* /proc/sys/net/ipv4 */
 enum
 {
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
new file mode 100644 (file)
index 0000000..52d8b1a
--- /dev/null
@@ -0,0 +1,154 @@
+#ifndef __NET_GENERIC_NETLINK_H
+#define __NET_GENERIC_NETLINK_H
+
+#include <linux/genetlink.h>
+#include <net/netlink.h>
+
+/**
+ * struct genl_family - generic netlink family
+ * @id: protocol family idenfitier
+ * @hdrsize: length of user specific header in bytes
+ * @name: name of family
+ * @version: protocol version
+ * @maxattr: maximum number of attributes supported
+ * @attrbuf: buffer to store parsed attributes
+ * @ops_list: list of all assigned operations
+ * @family_list: family list
+ */
+struct genl_family
+{
+       unsigned int            id;
+       unsigned int            hdrsize;
+       char                    name[GENL_NAMSIZ];
+       unsigned int            version;
+       unsigned int            maxattr;
+       struct module *         owner;
+       struct nlattr **        attrbuf;        /* private */
+       struct list_head        ops_list;       /* private */
+       struct list_head        family_list;    /* private */
+};
+
+#define GENL_ADMIN_PERM                0x01
+
+/**
+ * struct genl_info - receiving information
+ * @snd_seq: sending sequence number
+ * @snd_pid: netlink pid of sender
+ * @nlhdr: netlink message header
+ * @genlhdr: generic netlink message header
+ * @userhdr: user specific header
+ * @attrs: netlink attributes
+ */
+struct genl_info
+{
+       u32                     snd_seq;
+       u32                     snd_pid;
+       struct nlmsghdr *       nlhdr;
+       struct genlmsghdr *     genlhdr;
+       void *                  userhdr;
+       struct nlattr **        attrs;
+};
+
+/**
+ * struct genl_ops - generic netlink operations
+ * @cmd: command identifier
+ * @flags: flags
+ * @policy: attribute validation policy
+ * @doit: standard command callback
+ * @dumpit: callback for dumpers
+ * @ops_list: operations list
+ */
+struct genl_ops
+{
+       unsigned int            cmd;
+       unsigned int            flags;
+       struct nla_policy       *policy;
+       int                    (*doit)(struct sk_buff *skb,
+                                      struct genl_info *info);
+       int                    (*dumpit)(struct sk_buff *skb,
+                                        struct netlink_callback *cb);
+       struct list_head        ops_list;
+};
+
+extern int genl_register_family(struct genl_family *family);
+extern int genl_unregister_family(struct genl_family *family);
+extern int genl_register_ops(struct genl_family *, struct genl_ops *ops);
+extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops);
+
+extern struct sock *genl_sock;
+
+/**
+ * genlmsg_put - Add generic netlink header to netlink message
+ * @skb: socket buffer holding the message
+ * @pid: netlink pid the message is addressed to
+ * @seq: sequence number (usually the one of the sender)
+ * @type: netlink message type
+ * @hdrlen: length of the user specific header
+ * @flags netlink message flags
+ * @cmd: generic netlink command
+ * @version: version
+ *
+ * Returns pointer to user specific header
+ */
+static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+                               int type, int hdrlen, int flags,
+                               u8 cmd, u8 version)
+{
+       struct nlmsghdr *nlh;
+       struct genlmsghdr *hdr;
+
+       nlh = nlmsg_put(skb, pid, seq, type, GENL_HDRLEN + hdrlen, flags);
+       if (nlh == NULL)
+               return NULL;
+
+       hdr = nlmsg_data(nlh);
+       hdr->cmd = cmd;
+       hdr->version = version;
+       hdr->reserved = 0;
+
+       return (char *) hdr + GENL_HDRLEN;
+}
+
+/**
+ * genlmsg_end - Finalize a generic netlink message
+ * @skb: socket buffer the message is stored in
+ * @hdr: user specific header
+ */
+static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
+{
+       return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+}
+
+/**
+ * genlmsg_cancel - Cancel construction of a generic netlink message
+ * @skb: socket buffer the message is stored in
+ * @hdr: generic netlink message header
+ */
+static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
+{
+       return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+}
+
+/**
+ * genlmsg_multicast - multicast a netlink message
+ * @skb: netlink message as socket buffer
+ * @pid: own netlink pid to avoid sending to yourself
+ * @group: multicast group id
+ */
+static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
+                                   unsigned int group)
+{
+       return nlmsg_multicast(genl_sock, skb, pid, group);
+}
+
+/**
+ * genlmsg_unicast - unicast a netlink message
+ * @skb: netlink message as socket buffer
+ * @pid: netlink pid of the destination socket
+ */
+static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid)
+{
+       return nlmsg_unicast(genl_sock, skb, pid);
+}
+
+#endif /* __NET_GENERIC_NETLINK_H */
diff --git a/include/net/netfilter/ipv4/nf_conntrack_icmp.h b/include/net/netfilter/ipv4/nf_conntrack_icmp.h
new file mode 100644 (file)
index 0000000..3dd22cf
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _NF_CONNTRACK_ICMP_H
+#define _NF_CONNTRACK_ICMP_H
+/* ICMP tracking. */
+#include <asm/atomic.h>
+
+struct ip_ct_icmp
+{
+       /* Optimization: when number in == number out, forget immediately. */
+       atomic_t count;
+};
+#endif /* _NF_CONNTRACK_ICMP_H */
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
new file mode 100644 (file)
index 0000000..25b081a
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * IPv4 support for nf_conntrack.
+ *
+ * 23 Mar 2004: Yasuyuki Kozakai @ USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - move L3 protocol dependent part from include/linux/netfilter_ipv4/
+ *       ip_conntarck.h
+ */
+
+#ifndef _NF_CONNTRACK_IPV4_H
+#define _NF_CONNTRACK_IPV4_H
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#include <linux/netfilter_ipv4/ip_nat.h>
+
+/* per conntrack: nat application helper private data */
+union ip_conntrack_nat_help {
+        /* insert nat helper private data here */
+};
+
+struct nf_conntrack_ipv4_nat {
+       struct ip_nat_info info;
+       union ip_conntrack_nat_help help;
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+       defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+       int masq_index;
+#endif
+};
+#endif /* CONFIG_IP_NF_NAT_NEEDED */
+
+struct nf_conntrack_ipv4 {
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+       struct nf_conntrack_ipv4_nat *nat;
+#endif
+};
+
+/* Returns new sk_buff, or NULL */
+struct sk_buff *
+nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb);
+
+/* call to create an explicit dependency on nf_conntrack_l3proto_ipv4. */
+extern void need_ip_conntrack(void);
+
+#endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
new file mode 100644 (file)
index 0000000..86591af
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * ICMPv6 tracking.
+ *
+ * 21 Apl 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - separated from nf_conntrack_icmp.h
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h
+ */
+
+#ifndef _NF_CONNTRACK_ICMPV6_H
+#define _NF_CONNTRACK_ICMPV6_H
+#include <asm/atomic.h>
+
+#ifndef ICMPV6_NI_QUERY
+#define ICMPV6_NI_QUERY 139
+#endif
+#ifndef ICMPV6_NI_REPLY
+#define ICMPV6_NI_REPLY 140
+#endif
+
+struct nf_ct_icmpv6
+{
+       /* Optimization: when number in == number out, forget immediately. */
+       atomic_t count;
+};
+
+#endif /* _NF_CONNTRACK_ICMPV6_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
new file mode 100644 (file)
index 0000000..cc48256
--- /dev/null
@@ -0,0 +1,354 @@
+/*
+ * Connection state tracking for netfilter.  This is separated from,
+ * but required by, the (future) NAT layer; it can also be used by an iptables
+ * extension.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol dependent part.
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack.h
+ */
+
+#ifndef _NF_CONNTRACK_H
+#define _NF_CONNTRACK_H
+
+#include <linux/netfilter/nf_conntrack_common.h>
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/netfilter/nf_conntrack_sctp.h>
+#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+/* per conntrack: protocol private data */
+union nf_conntrack_proto {
+       /* insert conntrack proto private data here */
+       struct ip_ct_sctp sctp;
+       struct ip_ct_tcp tcp;
+       struct ip_ct_icmp icmp;
+       struct nf_ct_icmpv6 icmpv6;
+};
+
+union nf_conntrack_expect_proto {
+       /* insert expect proto private data here */
+};
+
+/* Add protocol helper include file here */
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+/* per conntrack: application helper private data */
+union nf_conntrack_help {
+       /* insert conntrack helper private data (master) here */
+       struct ip_ct_ftp_master ct_ftp_info;
+};
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NF_CT_ASSERT(x)                                                        \
+do {                                                                   \
+       if (!(x))                                                       \
+               /* Wooah!  I'm tripping my conntrack in a frenzy of     \
+                  netplay... */                                        \
+               printk("NF_CT_ASSERT: %s:%i(%s)\n",                     \
+                      __FILE__, __LINE__, __FUNCTION__);               \
+} while(0)
+#else
+#define NF_CT_ASSERT(x)
+#endif
+
+struct nf_conntrack_helper;
+
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+struct nf_conn
+{
+       /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+           plus 1 for any connection(s) we are `master' for */
+       struct nf_conntrack ct_general;
+
+       /* XXX should I move this to the tail ? - Y.K */
+       /* These are my tuples; original and reply */
+       struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
+
+       /* Have we seen traffic both ways yet? (bitset) */
+       unsigned long status;
+
+       /* Timer function; drops refcnt when it goes off. */
+       struct timer_list timeout;
+
+#ifdef CONFIG_NF_CT_ACCT
+       /* Accounting Information (same cache line as other written members) */
+       struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
+#endif
+       /* If we were expected by an expectation, this will be it */
+       struct nf_conn *master;
+       
+       /* Current number of expected connections */
+       unsigned int expecting;
+
+       /* Helper. if any */
+       struct nf_conntrack_helper *helper;
+
+       /* features - nat, helper, ... used by allocating system */
+       u_int32_t features;
+
+       /* Storage reserved for other modules: */
+
+       union nf_conntrack_proto proto;
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+       u_int32_t mark;
+#endif
+
+       /* These members are dynamically allocated. */
+
+       union nf_conntrack_help *help;
+
+       /* Layer 3 dependent members. (ex: NAT) */
+       union {
+               struct nf_conntrack_ipv4 *ipv4;
+       } l3proto;
+       void *data[0];
+};
+
+struct nf_conntrack_expect
+{
+       /* Internal linked list (global expectation list) */
+       struct list_head list;
+
+       /* We expect this tuple, with the following mask */
+       struct nf_conntrack_tuple tuple, mask;
+       /* Function to call after setup and insertion */
+       void (*expectfn)(struct nf_conn *new,
+                        struct nf_conntrack_expect *this);
+
+       /* The conntrack of the master connection */
+       struct nf_conn *master;
+
+       /* Timer function; deletes the expectation. */
+       struct timer_list timeout;
+
+       /* Usage count. */
+       atomic_t use;
+
+       /* Flags */
+       unsigned int flags;
+
+#ifdef CONFIG_NF_NAT_NEEDED
+       /* This is the original per-proto part, used to map the
+        * expected connection the way the recipient expects. */
+       union nf_conntrack_manip_proto saved_proto;
+       /* Direction relative to the master connection. */
+       enum ip_conntrack_dir dir;
+#endif
+};
+
+#define NF_CT_EXPECT_PERMANENT 0x1
+
+static inline struct nf_conn *
+nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
+{
+       return container_of(hash, struct nf_conn,
+                           tuplehash[hash->tuple.dst.dir]);
+}
+
+/* get master conntrack via master expectation */
+#define master_ct(conntr) (conntr->master)
+
+/* Alter reply tuple (maybe alter helper). */
+extern void
+nf_conntrack_alter_reply(struct nf_conn *conntrack,
+                        const struct nf_conntrack_tuple *newreply);
+
+/* Is this tuple taken? (ignoring any belonging to the given
+   conntrack). */
+extern int
+nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
+                        const struct nf_conn *ignored_conntrack);
+
+/* Return conntrack_info and tuple hash for given skb. */
+static inline struct nf_conn *
+nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+{
+       *ctinfo = skb->nfctinfo;
+       return (struct nf_conn *)skb->nfct;
+}
+
+/* decrement reference count on a conntrack */
+static inline void nf_ct_put(struct nf_conn *ct)
+{
+       NF_CT_ASSERT(ct);
+       nf_conntrack_put(&ct->ct_general);
+}
+
+/* call to create an explicit dependency on nf_conntrack. */
+extern void need_nf_conntrack(void);
+
+extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
+                               const struct nf_conntrack_tuple *orig);
+
+extern void __nf_ct_refresh_acct(struct nf_conn *ct,
+                                enum ip_conntrack_info ctinfo,
+                                const struct sk_buff *skb,
+                                unsigned long extra_jiffies,
+                                int do_acct);
+
+/* Refresh conntrack for this many jiffies and do accounting */
+static inline void nf_ct_refresh_acct(struct nf_conn *ct,
+                                     enum ip_conntrack_info ctinfo,
+                                     const struct sk_buff *skb,
+                                     unsigned long extra_jiffies)
+{
+       __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
+}
+
+/* Refresh conntrack for this many jiffies */
+static inline void nf_ct_refresh(struct nf_conn *ct,
+                                const struct sk_buff *skb,
+                                unsigned long extra_jiffies)
+{
+       __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
+}
+
+/* These are for NAT.  Icky. */
+/* Update TCP window tracking data when NAT mangles the packet */
+extern void nf_conntrack_tcp_update(struct sk_buff *skb,
+                                   unsigned int dataoff,
+                                   struct nf_conn *conntrack,
+                                   int dir);
+
+/* Call me when a conntrack is destroyed. */
+extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
+
+/* Fake conntrack entry for untracked connections */
+extern struct nf_conn nf_conntrack_untracked;
+
+extern int nf_ct_no_defrag;
+
+/* Iterate over all conntracks: if iter returns true, it's deleted. */
+extern void
+nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
+extern void nf_conntrack_free(struct nf_conn *ct);
+extern struct nf_conn *
+nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+                  const struct nf_conntrack_tuple *repl);
+
+/* It's confirmed if it is, or has been in the hash table. */
+static inline int nf_ct_is_confirmed(struct nf_conn *ct)
+{
+       return test_bit(IPS_CONFIRMED_BIT, &ct->status);
+}
+
+static inline int nf_ct_is_dying(struct nf_conn *ct)
+{
+       return test_bit(IPS_DYING_BIT, &ct->status);
+}
+
+extern unsigned int nf_conntrack_htable_size;
+
+#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+
+struct nf_conntrack_ecache {
+       struct nf_conn *ct;
+       unsigned int events;
+};
+DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+
+#define CONNTRACK_ECACHE(x)    (__get_cpu_var(nf_conntrack_ecache).x)
+
+extern struct notifier_block *nf_conntrack_chain;
+extern struct notifier_block *nf_conntrack_expect_chain;
+
+static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_register(&nf_conntrack_chain, nb);
+}
+
+static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+
+static inline int
+nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+
+static inline int
+nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+
+extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
+extern void __nf_ct_event_cache_init(struct nf_conn *ct);
+
+static inline void
+nf_conntrack_event_cache(enum ip_conntrack_events event,
+                        const struct sk_buff *skb)
+{
+       struct nf_conn *ct = (struct nf_conn *)skb->nfct;
+       struct nf_conntrack_ecache *ecache;
+
+       local_bh_disable();
+       ecache = &__get_cpu_var(nf_conntrack_ecache);
+       if (ct != ecache->ct)
+               __nf_ct_event_cache_init(ct);
+       ecache->events |= event;
+       local_bh_enable();
+}
+
+static inline void nf_conntrack_event(enum ip_conntrack_events event,
+                                     struct nf_conn *ct)
+{
+       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
+               notifier_call_chain(&nf_conntrack_chain, event, ct);
+}
+
+static inline void
+nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
+                         struct nf_conntrack_expect *exp)
+{
+       notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
+}
+#else /* CONFIG_NF_CONNTRACK_EVENTS */
+static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
+                                           const struct sk_buff *skb) {}
+static inline void nf_conntrack_event(enum ip_conntrack_events event,
+                                     struct nf_conn *ct) {}
+static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
+static inline void
+nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
+                         struct nf_conntrack_expect *exp) {}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+/* no helper, no nat */
+#define        NF_CT_F_BASIC   0
+/* for helper */
+#define        NF_CT_F_HELP    1
+/* for nat. */
+#define        NF_CT_F_NAT     2
+#define NF_CT_F_NUM    4
+
+extern int
+nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size,
+                           int (*init_conntrack)(struct nf_conn *, u_int32_t));
+extern void
+nf_conntrack_unregister_cache(u_int32_t features);
+
+#endif /* __KERNEL__ */
+#endif /* _NF_CONNTRACK_H */
diff --git a/include/net/netfilter/nf_conntrack_compat.h b/include/net/netfilter/nf_conntrack_compat.h
new file mode 100644 (file)
index 0000000..3cac19f
--- /dev/null
@@ -0,0 +1,108 @@
+#ifndef _NF_CONNTRACK_COMPAT_H
+#define _NF_CONNTRACK_COMPAT_H
+
+#ifdef __KERNEL__
+
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+#ifdef CONFIG_IP_NF_CONNTRACK_MARK
+static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
+                                       u_int32_t *ctinfo)
+{
+       struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
+
+       if (ct)
+               return &ct->mark;
+       else
+               return NULL;
+}
+#endif /* CONFIG_IP_NF_CONNTRACK_MARK */
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+static inline struct ip_conntrack_counter *
+nf_ct_get_counters(const struct sk_buff *skb)
+{
+       enum ip_conntrack_info ctinfo;
+       struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo);
+
+       if (ct)
+               return ct->counters;
+       else
+               return NULL;
+}
+#endif /* CONFIG_IP_NF_CT_ACCT */
+
+static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+{
+       return (skb->nfct == &ip_conntrack_untracked.ct_general);
+}
+
+static inline void nf_ct_untrack(struct sk_buff *skb)
+{
+       skb->nfct = &ip_conntrack_untracked.ct_general;
+}
+
+static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
+                                  enum ip_conntrack_info *ctinfo)
+{
+       struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
+       return (ct != NULL);
+}
+
+#else /* CONFIG_IP_NF_CONNTRACK */
+
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+
+static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
+                                       u_int32_t *ctinfo)
+{
+       struct nf_conn *ct = nf_ct_get(skb, ctinfo);
+
+       if (ct)
+               return &ct->mark;
+       else
+               return NULL;
+}
+#endif /* CONFIG_NF_CONNTRACK_MARK */
+
+#ifdef CONFIG_NF_CT_ACCT
+static inline struct ip_conntrack_counter *
+nf_ct_get_counters(const struct sk_buff *skb)
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+       if (ct)
+               return ct->counters;
+       else
+               return NULL;
+}
+#endif /* CONFIG_NF_CT_ACCT */
+
+static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+{
+       return (skb->nfct == &nf_conntrack_untracked.ct_general);
+}
+
+static inline void nf_ct_untrack(struct sk_buff *skb)
+{
+       skb->nfct = &nf_conntrack_untracked.ct_general;
+}
+
+static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
+                                  enum ip_conntrack_info *ctinfo)
+{
+       struct nf_conn *ct = nf_ct_get(skb, ctinfo);
+       return (ct != NULL);
+}
+
+#endif /* CONFIG_IP_NF_CONNTRACK */
+
+#endif /* __KERNEL__ */
+
+#endif /* _NF_CONNTRACK_COMPAT_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
new file mode 100644 (file)
index 0000000..da25452
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * This header is used to share core functionality between the
+ * standalone connection tracking module, and the compatibility layer's use
+ * of connection tracking.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol dependent part.
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h
+ */
+
+#ifndef _NF_CONNTRACK_CORE_H
+#define _NF_CONNTRACK_CORE_H
+
+#include <linux/netfilter.h>
+
+/* This header is used to share core functionality between the
+   standalone connection tracking module, and the compatibility layer's use
+   of connection tracking. */
+extern unsigned int nf_conntrack_in(int pf,
+                                   unsigned int hooknum,
+                                   struct sk_buff **pskb);
+
+extern int nf_conntrack_init(void);
+extern void nf_conntrack_cleanup(void);
+
+struct nf_conntrack_l3proto;
+extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf);
+/* Like above, but you already have conntrack read lock. */
+extern struct nf_conntrack_l3proto *__nf_ct_find_l3proto(u_int16_t l3proto);
+
+struct nf_conntrack_protocol;
+
+extern int
+nf_ct_get_tuple(const struct sk_buff *skb,
+               unsigned int nhoff,
+               unsigned int dataoff,
+               u_int16_t l3num,
+               u_int8_t protonum,
+               struct nf_conntrack_tuple *tuple,
+               const struct nf_conntrack_l3proto *l3proto,
+               const struct nf_conntrack_protocol *protocol);
+
+extern int
+nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
+                  const struct nf_conntrack_tuple *orig,
+                  const struct nf_conntrack_l3proto *l3proto,
+                  const struct nf_conntrack_protocol *protocol);
+
+/* Find a connection corresponding to a tuple. */
+extern struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
+                     const struct nf_conn *ignored_conntrack);
+
+extern int __nf_conntrack_confirm(struct sk_buff **pskb);
+
+/* Confirm a connection: returns NF_DROP if packet must be dropped. */
+static inline int nf_conntrack_confirm(struct sk_buff **pskb)
+{
+       struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
+       int ret = NF_ACCEPT;
+
+       if (ct) {
+               if (!nf_ct_is_confirmed(ct))
+                       ret = __nf_conntrack_confirm(pskb);
+               nf_ct_deliver_cached_events(ct);
+       }
+       return ret;
+}
+
+extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb);
+
+extern struct list_head *nf_conntrack_hash;
+extern struct list_head nf_conntrack_expect_list;
+extern rwlock_t nf_conntrack_lock ;
+#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
new file mode 100644 (file)
index 0000000..5a66b2a
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * connection tracking helpers.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol dependent part.
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack_helper.h
+ */
+
+#ifndef _NF_CONNTRACK_HELPER_H
+#define _NF_CONNTRACK_HELPER_H
+#include <net/netfilter/nf_conntrack.h>
+
+struct module;
+
+struct nf_conntrack_helper
+{      
+       struct list_head list;          /* Internal use. */
+
+       const char *name;               /* name of the module */
+       struct module *me;              /* pointer to self */
+       unsigned int max_expected;      /* Maximum number of concurrent 
+                                        * expected connections */
+       unsigned int timeout;           /* timeout for expecteds */
+
+       /* Mask of things we will help (compared against server response) */
+       struct nf_conntrack_tuple tuple;
+       struct nf_conntrack_tuple mask;
+       
+       /* Function to call when data passes; return verdict, or -1 to
+           invalidate. */
+       int (*help)(struct sk_buff **pskb,
+                   unsigned int protoff,
+                   struct nf_conn *ct,
+                   enum ip_conntrack_info conntrackinfo);
+};
+
+extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
+extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
+
+/* Allocate space for an expectation: this is mandatory before calling
+   nf_conntrack_expect_related.  You will have to call put afterwards. */
+extern struct nf_conntrack_expect *
+nf_conntrack_expect_alloc(struct nf_conn *master);
+extern void nf_conntrack_expect_put(struct nf_conntrack_expect *exp);
+
+/* Add an expected connection: can have more than one per connection */
+extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp);
+extern void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp);
+
+#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
new file mode 100644 (file)
index 0000000..01663e5
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * Header for use in defining a given L3 protocol for connection tracking.
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
+ */
+
+#ifndef _NF_CONNTRACK_L3PROTO_H
+#define _NF_CONNTRACK_L3PROTO_H
+#include <linux/seq_file.h>
+#include <net/netfilter/nf_conntrack.h>
+
+struct nf_conntrack_l3proto
+{
+       /* Next pointer. */
+       struct list_head list;
+
+       /* L3 Protocol Family number. ex) PF_INET */
+       u_int16_t l3proto;
+
+       /* Protocol name */
+       const char *name;
+
+       /*
+        * Try to fill in the third arg: nhoff is offset of l3 proto
+         * hdr.  Return true if possible.
+        */
+       int (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
+                           struct nf_conntrack_tuple *tuple);
+
+       /*
+        * Invert the per-proto part of the tuple: ie. turn xmit into reply.
+        * Some packets can't be inverted: return 0 in that case.
+        */
+       int (*invert_tuple)(struct nf_conntrack_tuple *inverse,
+                           const struct nf_conntrack_tuple *orig);
+
+       /* Print out the per-protocol part of the tuple. */
+       int (*print_tuple)(struct seq_file *s,
+                          const struct nf_conntrack_tuple *);
+
+       /* Print out the private part of the conntrack. */
+       int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
+
+       /* Returns verdict for packet, or -1 for invalid. */
+       int (*packet)(struct nf_conn *conntrack,
+                     const struct sk_buff *skb,
+                     enum ip_conntrack_info ctinfo);
+
+       /*
+        * Called when a new connection for this protocol found;
+        * returns TRUE if it's OK.  If so, packet() called next.
+        */
+       int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb);
+
+       /* Called when a conntrack entry is destroyed */
+       void (*destroy)(struct nf_conn *conntrack);
+
+       /*
+        * Called before tracking. 
+        *      *dataoff: offset of protocol header (TCP, UDP,...) in *pskb
+        *      *protonum: protocol number
+        */
+       int (*prepare)(struct sk_buff **pskb, unsigned int hooknum,
+                      unsigned int *dataoff, u_int8_t *protonum);
+
+       u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple);
+
+       /* Module (if any) which this is connected to. */
+       struct module *me;
+};
+
+extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+
+/* Protocol registration. */
+extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
+extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+
+static inline struct nf_conntrack_l3proto *
+nf_ct_find_l3proto(u_int16_t l3proto)
+{
+       return nf_ct_l3protos[l3proto];
+}
+
+/* Existing built-in protocols */
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
+extern struct nf_conntrack_l3proto nf_conntrack_generic_l3proto;
+#endif /*_NF_CONNTRACK_L3PROTO_H*/
diff --git a/include/net/netfilter/nf_conntrack_protocol.h b/include/net/netfilter/nf_conntrack_protocol.h
new file mode 100644 (file)
index 0000000..b3afda3
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Header for use in defining a given protocol for connection tracking.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalized L3 protocol dependent part.
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h
+ */
+
+#ifndef _NF_CONNTRACK_PROTOCOL_H
+#define _NF_CONNTRACK_PROTOCOL_H
+#include <net/netfilter/nf_conntrack.h>
+
+struct seq_file;
+
+struct nf_conntrack_protocol
+{
+       /* Next pointer. */
+       struct list_head list;
+
+       /* L3 Protocol number. */
+       u_int16_t l3proto;
+
+       /* Protocol number. */
+       u_int8_t proto;
+
+       /* Protocol name */
+       const char *name;
+
+       /* Try to fill in the third arg: dataoff is offset past network protocol
+           hdr.  Return true if possible. */
+       int (*pkt_to_tuple)(const struct sk_buff *skb,
+                           unsigned int dataoff,
+                           struct nf_conntrack_tuple *tuple);
+
+       /* Invert the per-proto part of the tuple: ie. turn xmit into reply.
+        * Some packets can't be inverted: return 0 in that case.
+        */
+       int (*invert_tuple)(struct nf_conntrack_tuple *inverse,
+                           const struct nf_conntrack_tuple *orig);
+
+       /* Print out the per-protocol part of the tuple. Return like seq_* */
+       int (*print_tuple)(struct seq_file *s,
+                          const struct nf_conntrack_tuple *);
+
+       /* Print out the private part of the conntrack. */
+       int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
+
+       /* Returns verdict for packet, or -1 for invalid. */
+       int (*packet)(struct nf_conn *conntrack,
+                     const struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info ctinfo,
+                     int pf,
+                     unsigned int hooknum);
+
+       /* Called when a new connection for this protocol found;
+        * returns TRUE if it's OK.  If so, packet() called next. */
+       int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb,
+                  unsigned int dataoff);
+
+       /* Called when a conntrack entry is destroyed */
+       void (*destroy)(struct nf_conn *conntrack);
+
+       int (*error)(struct sk_buff *skb, unsigned int dataoff,
+                    enum ip_conntrack_info *ctinfo,
+                    int pf, unsigned int hooknum);
+
+       /* Module (if any) which this is connected to. */
+       struct module *me;
+};
+
+/* Existing built-in protocols */
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
+extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
+
+#define MAX_NF_CT_PROTO 256
+extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
+
+extern struct nf_conntrack_protocol *
+nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol);
+
+/* Protocol registration. */
+extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto);
+extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto);
+
+/* Log invalid packets */
+extern unsigned int nf_ct_log_invalid;
+
+#ifdef CONFIG_SYSCTL
+#ifdef DEBUG_INVALID_PACKETS
+#define LOG_INVALID(proto) \
+       (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#else
+#define LOG_INVALID(proto) \
+       ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+        && net_ratelimit())
+#endif
+#else
+#define LOG_INVALID(proto) 0
+#endif /* CONFIG_SYSCTL */
+
+#endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
new file mode 100644 (file)
index 0000000..14ce790
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Definitions and Declarations for tuple.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol dependent part.
+ *
+ * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h
+ */
+
+#ifndef _NF_CONNTRACK_TUPLE_H
+#define _NF_CONNTRACK_TUPLE_H
+
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
+/* A `tuple' is a structure containing the information to uniquely
+  identify a connection.  ie. if two packets have the same tuple, they
+  are in the same connection; if not, they are not.
+
+  We divide the structure along "manipulatable" and
+  "non-manipulatable" lines, for the benefit of the NAT code.
+*/
+
+#define NF_CT_TUPLE_L3SIZE     4
+
+/* The l3 protocol-specific manipulable parts of the tuple: always in
+   network order! */
+union nf_conntrack_man_l3proto {
+       u_int32_t all[NF_CT_TUPLE_L3SIZE];
+       u_int32_t ip;
+       u_int32_t ip6[4];
+};
+
+/* The protocol-specific manipulable parts of the tuple: always in
+   network order! */
+union nf_conntrack_man_proto
+{
+       /* Add other protocols here. */
+       u_int16_t all;
+
+       struct {
+               u_int16_t port;
+       } tcp;
+       struct {
+               u_int16_t port;
+       } udp;
+       struct {
+               u_int16_t id;
+       } icmp;
+       struct {
+               u_int16_t port;
+       } sctp;
+};
+
+/* The manipulable part of the tuple. */
+struct nf_conntrack_man
+{
+       union nf_conntrack_man_l3proto u3;
+       union nf_conntrack_man_proto u;
+       /* Layer 3 protocol */
+       u_int16_t l3num;
+};
+
+/* This contains the information to distinguish a connection. */
+struct nf_conntrack_tuple
+{
+       struct nf_conntrack_man src;
+
+       /* These are the parts of the tuple which are fixed. */
+       struct {
+               union {
+                       u_int32_t all[NF_CT_TUPLE_L3SIZE];
+                       u_int32_t ip;
+                       u_int32_t ip6[4];
+               } u3;
+               union {
+                       /* Add other protocols here. */
+                       u_int16_t all;
+
+                       struct {
+                               u_int16_t port;
+                       } tcp;
+                       struct {
+                               u_int16_t port;
+                       } udp;
+                       struct {
+                               u_int8_t type, code;
+                       } icmp;
+                       struct {
+                               u_int16_t port;
+                       } sctp;
+               } u;
+
+               /* The protocol. */
+               u_int8_t protonum;
+
+               /* The direction (for tuplehash) */
+               u_int8_t dir;
+       } dst;
+};
+
+/* This is optimized opposed to a memset of the whole structure.  Everything we
+ * really care about is the  source/destination unions */
+#define NF_CT_TUPLE_U_BLANK(tuple)                                     \
+        do {                                                           \
+                (tuple)->src.u.all = 0;                                \
+                (tuple)->dst.u.all = 0;                                \
+               memset(&(tuple)->src.u3, 0, sizeof((tuple)->src.u3));   \
+               memset(&(tuple)->dst.u3, 0, sizeof((tuple)->dst.u3));   \
+        } while (0)
+
+#ifdef __KERNEL__
+
+#define NF_CT_DUMP_TUPLE(tp)                                               \
+DEBUGP("tuple %p: %u %u %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu -> %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu\n",                                     \
+       (tp), (tp)->src.l3num, (tp)->dst.protonum,                          \
+       NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \
+       NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all))
+
+/* If we're the first tuple, it's the original dir. */
+#define NF_CT_DIRECTION(h)                                             \
+       ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
+
+/* Connections have two entries in the hash table: one for each way */
+struct nf_conntrack_tuple_hash
+{
+       struct list_head list;
+
+       struct nf_conntrack_tuple tuple;
+};
+
+#endif /* __KERNEL__ */
+
+static inline int nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1,
+                                       const struct nf_conntrack_tuple *t2)
+{ 
+       return (t1->src.u3.all[0] == t2->src.u3.all[0] &&
+               t1->src.u3.all[1] == t2->src.u3.all[1] &&
+               t1->src.u3.all[2] == t2->src.u3.all[2] &&
+               t1->src.u3.all[3] == t2->src.u3.all[3] &&
+               t1->src.u.all == t2->src.u.all &&
+               t1->src.l3num == t2->src.l3num &&
+               t1->dst.protonum == t2->dst.protonum);
+}
+
+static inline int nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1,
+                                       const struct nf_conntrack_tuple *t2)
+{
+       return (t1->dst.u3.all[0] == t2->dst.u3.all[0] &&
+               t1->dst.u3.all[1] == t2->dst.u3.all[1] &&
+               t1->dst.u3.all[2] == t2->dst.u3.all[2] &&
+               t1->dst.u3.all[3] == t2->dst.u3.all[3] &&
+               t1->dst.u.all == t2->dst.u.all &&
+               t1->src.l3num == t2->src.l3num &&
+               t1->dst.protonum == t2->dst.protonum);
+}
+
+static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1,
+                                   const struct nf_conntrack_tuple *t2)
+{
+       return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2);
+}
+
+static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t,
+                                      const struct nf_conntrack_tuple *tuple,
+                                      const struct nf_conntrack_tuple *mask)
+{
+       int count = 0;
+
+        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+                if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) &
+                    mask->src.u3.all[count])
+                        return 0;
+        }
+
+        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+                if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) &
+                    mask->dst.u3.all[count])
+                        return 0;
+        }
+
+        if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all ||
+            (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all ||
+            (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num ||
+            (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum)
+                return 0;
+
+        return 1;
+}
+
+#endif /* _NF_CONNTRACK_TUPLE_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
new file mode 100644 (file)
index 0000000..640c26a
--- /dev/null
@@ -0,0 +1,883 @@
+#ifndef __NET_NETLINK_H
+#define __NET_NETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* ========================================================================
+ *         Netlink Messages and Attributes Interface (As Seen On TV)
+ * ------------------------------------------------------------------------
+ *                          Messages Interface
+ * ------------------------------------------------------------------------
+ *
+ * Message Format:
+ *    <--- nlmsg_total_size(payload)  --->
+ *    <-- nlmsg_msg_size(payload) ->
+ *   +----------+- - -+-------------+- - -+-------- - -
+ *   | nlmsghdr | Pad |   Payload   | Pad | nlmsghdr
+ *   +----------+- - -+-------------+- - -+-------- - -
+ *   nlmsg_data(nlh)---^                   ^
+ *   nlmsg_next(nlh)-----------------------+
+ *
+ * Payload Format:
+ *    <---------------------- nlmsg_len(nlh) --------------------->
+ *    <------ hdrlen ------>       <- nlmsg_attrlen(nlh, hdrlen) ->
+ *   +----------------------+- - -+--------------------------------+
+ *   |     Family Header    | Pad |           Attributes           |
+ *   +----------------------+- - -+--------------------------------+
+ *   nlmsg_attrdata(nlh, hdrlen)---^
+ *
+ * Data Structures:
+ *   struct nlmsghdr                   netlink message header
+ *
+ * Message Construction:
+ *   nlmsg_new()                       create a new netlink message
+ *   nlmsg_put()                       add a netlink message to an skb
+ *   nlmsg_put_answer()                        callback based nlmsg_put()
+ *   nlmsg_end()                       finanlize netlink message
+ *   nlmsg_cancel()                    cancel message construction
+ *   nlmsg_free()                      free a netlink message
+ *
+ * Message Sending:
+ *   nlmsg_multicast()                 multicast message to several groups
+ *   nlmsg_unicast()                   unicast a message to a single socket
+ *
+ * Message Length Calculations:
+ *   nlmsg_msg_size(payload)           length of message w/o padding
+ *   nlmsg_total_size(payload)         length of message w/ padding
+ *   nlmsg_padlen(payload)             length of padding at tail
+ *
+ * Message Payload Access:
+ *   nlmsg_data(nlh)                   head of message payload
+ *   nlmsg_len(nlh)                    length of message payload
+ *   nlmsg_attrdata(nlh, hdrlen)       head of attributes data
+ *   nlmsg_attrlen(nlh, hdrlen)                length of attributes data
+ *
+ * Message Parsing:
+ *   nlmsg_ok(nlh, remaining)          does nlh fit into remaining bytes?
+ *   nlmsg_next(nlh, remaining)                get next netlink message
+ *   nlmsg_parse()                     parse attributes of a message
+ *   nlmsg_find_attr()                 find an attribute in a message
+ *   nlmsg_for_each_msg()              loop over all messages
+ *   nlmsg_validate()                  validate netlink message incl. attrs
+ *   nlmsg_for_each_attr()             loop over all attributes
+ *
+ * ------------------------------------------------------------------------
+ *                          Attributes Interface
+ * ------------------------------------------------------------------------
+ *
+ * Attribute Format:
+ *    <------- nla_total_size(payload) ------->
+ *    <---- nla_attr_size(payload) ----->
+ *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
+ *   |  Header  | Pad |     Payload      | Pad |  Header
+ *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
+ *                     <- nla_len(nla) ->      ^
+ *   nla_data(nla)----^                        |
+ *   nla_next(nla)-----------------------------'
+ *
+ * Data Structures:
+ *   struct nlattr                     netlink attribtue header
+ *
+ * Attribute Construction:
+ *   nla_reserve(skb, type, len)       reserve skb tailroom for an attribute
+ *   nla_put(skb, type, len, data)     add attribute to skb
+ *
+ * Attribute Construction for Basic Types:
+ *   nla_put_u8(skb, type, value)      add u8 attribute to skb
+ *   nla_put_u16(skb, type, value)     add u16 attribute to skb
+ *   nla_put_u32(skb, type, value)     add u32 attribute to skb
+ *   nla_put_u64(skb, type, value)     add u64 attribute to skb
+ *   nla_put_string(skb, type, str)    add string attribute to skb
+ *   nla_put_flag(skb, type)           add flag attribute to skb
+ *   nla_put_msecs(skb, type, jiffies) add msecs attribute to skb
+ *
+ * Exceptions Based Attribute Construction:
+ *   NLA_PUT(skb, type, len, data)     add attribute to skb
+ *   NLA_PUT_U8(skb, type, value)      add u8 attribute to skb
+ *   NLA_PUT_U16(skb, type, value)     add u16 attribute to skb
+ *   NLA_PUT_U32(skb, type, value)     add u32 attribute to skb
+ *   NLA_PUT_U64(skb, type, value)     add u64 attribute to skb
+ *   NLA_PUT_STRING(skb, type, str)    add string attribute to skb
+ *   NLA_PUT_FLAG(skb, type)           add flag attribute to skb
+ *   NLA_PUT_MSECS(skb, type, jiffies) add msecs attribute to skb
+ *
+ *   The meaning of these functions is equal to their lower case
+ *   variants but they jump to the label nla_put_failure in case
+ *   of a failure.
+ *
+ * Nested Attributes Construction:
+ *   nla_nest_start(skb, type)         start a nested attribute
+ *   nla_nest_end(skb, nla)            finalize a nested attribute
+ *   nla_nest_cancel(skb, nla)         cancel nested attribute construction
+ *
+ * Attribute Length Calculations:
+ *   nla_attr_size(payload)            length of attribute w/o padding
+ *   nla_total_size(payload)           length of attribute w/ padding
+ *   nla_padlen(payload)               length of padding
+ *
+ * Attribute Payload Access:
+ *   nla_data(nla)                     head of attribute payload
+ *   nla_len(nla)                      length of attribute payload
+ *
+ * Attribute Payload Access for Basic Types:
+ *   nla_get_u8(nla)                   get payload for a u8 attribute
+ *   nla_get_u16(nla)                  get payload for a u16 attribute
+ *   nla_get_u32(nla)                  get payload for a u32 attribute
+ *   nla_get_u64(nla)                  get payload for a u64 attribute
+ *   nla_get_flag(nla)                 return 1 if flag is true
+ *   nla_get_msecs(nla)                        get payload for a msecs attribute
+ *
+ * Attribute Misc:
+ *   nla_memcpy(dest, nla, count)      copy attribute into memory
+ *   nla_memcmp(nla, data, size)       compare attribute with memory area
+ *   nla_strlcpy(dst, nla, size)       copy attribute to a sized string
+ *   nla_strcmp(nla, str)              compare attribute with string
+ *
+ * Attribute Parsing:
+ *   nla_ok(nla, remaining)            does nla fit into remaining bytes?
+ *   nla_next(nla, remaining)          get next netlink attribute
+ *   nla_validate()                    validate a stream of attributes
+ *   nla_find()                                find attribute in stream of attributes
+ *   nla_parse()                       parse and validate stream of attrs
+ *   nla_parse_nested()                        parse nested attribuets
+ *   nla_for_each_attr()               loop over all attributes
+ *=========================================================================
+ */
+
+ /**
+  * Standard attribute types to specify validation policy
+  */
+enum {
+       NLA_UNSPEC,
+       NLA_U8,
+       NLA_U16,
+       NLA_U32,
+       NLA_U64,
+       NLA_STRING,
+       NLA_FLAG,
+       NLA_MSECS,
+       NLA_NESTED,
+       __NLA_TYPE_MAX,
+};
+
+#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+
+/**
+ * struct nla_policy - attribute validation policy
+ * @type: Type of attribute or NLA_UNSPEC
+ * @minlen: Minimal length of payload required to be available
+ *
+ * Policies are defined as arrays of this struct, the array must be
+ * accessible by attribute type up to the highest identifier to be expected.
+ *
+ * Example:
+ * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
+ *     [ATTR_FOO] = { .type = NLA_U16 },
+ *     [ATTR_BAR] = { .type = NLA_STRING },
+ *     [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
+ * };
+ */
+struct nla_policy {
+       u16             type;
+       u16             minlen;
+};
+
+extern void            netlink_run_queue(struct sock *sk, unsigned int *qlen,
+                                         int (*cb)(struct sk_buff *,
+                                                   struct nlmsghdr *, int *));
+extern void            netlink_queue_skip(struct nlmsghdr *nlh,
+                                          struct sk_buff *skb);
+
+extern int             nla_validate(struct nlattr *head, int len, int maxtype,
+                                    struct nla_policy *policy);
+extern int             nla_parse(struct nlattr *tb[], int maxtype,
+                                 struct nlattr *head, int len,
+                                 struct nla_policy *policy);
+extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype);
+extern size_t          nla_strlcpy(char *dst, const struct nlattr *nla,
+                                   size_t dstsize);
+extern int             nla_memcpy(void *dest, struct nlattr *src, int count);
+extern int             nla_memcmp(const struct nlattr *nla, const void *data,
+                                  size_t size);
+extern int             nla_strcmp(const struct nlattr *nla, const char *str);
+extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype,
+                                     int attrlen);
+extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype,
+                                   int attrlen);
+extern void            __nla_put(struct sk_buff *skb, int attrtype,
+                                 int attrlen, const void *data);
+extern int             nla_put(struct sk_buff *skb, int attrtype,
+                               int attrlen, const void *data);
+
+/**************************************************************************
+ * Netlink Messages
+ **************************************************************************/
+
+/**
+ * nlmsg_msg_size - length of netlink message not including padding
+ * @payload: length of message payload
+ */
+static inline int nlmsg_msg_size(int payload)
+{
+       return NLMSG_HDRLEN + payload;
+}
+
+/**
+ * nlmsg_total_size - length of netlink message including padding
+ * @payload: length of message payload
+ */
+static inline int nlmsg_total_size(int payload)
+{
+       return NLMSG_ALIGN(nlmsg_msg_size(payload));
+}
+
+/**
+ * nlmsg_padlen - length of padding at the message's tail
+ * @payload: length of message payload
+ */
+static inline int nlmsg_padlen(int payload)
+{
+       return nlmsg_total_size(payload) - nlmsg_msg_size(payload);
+}
+
+/**
+ * nlmsg_data - head of message payload
+ * @nlh: netlink messsage header
+ */
+static inline void *nlmsg_data(const struct nlmsghdr *nlh)
+{
+       return (unsigned char *) nlh + NLMSG_HDRLEN;
+}
+
+/**
+ * nlmsg_len - length of message payload
+ * @nlh: netlink message header
+ */
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+       return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * nlmsg_attrdata - head of attributes data
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ */
+static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh,
+                                           int hdrlen)
+{
+       unsigned char *data = nlmsg_data(nlh);
+       return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen));
+}
+
+/**
+ * nlmsg_attrlen - length of attributes data
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ */
+static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen)
+{
+       return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen);
+}
+
+/**
+ * nlmsg_ok - check if the netlink message fits into the remaining bytes
+ * @nlh: netlink message header
+ * @remaining: number of bytes remaining in message stream
+ */
+static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
+{
+       return (remaining >= sizeof(struct nlmsghdr) &&
+               nlh->nlmsg_len >= sizeof(struct nlmsghdr) &&
+               nlh->nlmsg_len <= remaining);
+}
+
+/**
+ * nlmsg_next - next netlink message in message stream
+ * @nlh: netlink message header
+ * @remaining: number of bytes remaining in message stream
+ *
+ * Returns the next netlink message in the message stream and
+ * decrements remaining by the size of the current message.
+ */
+static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining)
+{
+       int totlen = NLMSG_ALIGN(nlh->nlmsg_len);
+
+       *remaining -= totlen;
+
+       return (struct nlmsghdr *) ((unsigned char *) nlh + totlen);
+}
+
+/**
+ * nlmsg_parse - parse attributes of a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * See nla_parse()
+ */
+static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen,
+                             struct nlattr *tb[], int maxtype,
+                             struct nla_policy *policy)
+{
+       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+               return -EINVAL;
+
+       return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
+                        nlmsg_attrlen(nlh, hdrlen), policy);
+}
+
+/**
+ * nlmsg_find_attr - find a specific attribute in a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of familiy specific header
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
+                                            int hdrlen, int attrtype)
+{
+       return nla_find(nlmsg_attrdata(nlh, hdrlen),
+                       nlmsg_attrlen(nlh, hdrlen), attrtype);
+}
+
+/**
+ * nlmsg_validate - validate a netlink message including attributes
+ * @nlh: netlinket message header
+ * @hdrlen: length of familiy specific header
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ */
+static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
+                                struct nla_policy *policy)
+{
+       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+               return -EINVAL;
+
+       return nla_validate(nlmsg_attrdata(nlh, hdrlen),
+                           nlmsg_attrlen(nlh, hdrlen), maxtype, policy);
+}
+
+/**
+ * nlmsg_for_each_attr - iterate over a stream of attributes
+ * @pos: loop counter, set to current attribute
+ * @nlh: netlink message header
+ * @hdrlen: length of familiy specific header
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
+       nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \
+                         nlmsg_attrlen(nlh, hdrlen), rem)
+
+#if 0
+/* FIXME: Enable once all users have been converted */
+
+/**
+ * __nlmsg_put - Add a new netlink message to an skb
+ * @skb: socket buffer to store message in
+ * @pid: netlink process id
+ * @seq: sequence number of message
+ * @type: message type
+ * @payload: length of message payload
+ * @flags: message flags
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for both the netlink header and payload.
+ */
+static inline struct nlmsghdr *__nlmsg_put(struct sk_buff *skb, u32 pid,
+                                          u32 seq, int type, int payload,
+                                          int flags)
+{
+       struct nlmsghdr *nlh;
+
+       nlh = (struct nlmsghdr *) skb_put(skb, nlmsg_total_size(payload));
+       nlh->nlmsg_type = type;
+       nlh->nlmsg_len = nlmsg_msg_size(payload);
+       nlh->nlmsg_flags = flags;
+       nlh->nlmsg_pid = pid;
+       nlh->nlmsg_seq = seq;
+
+       memset((unsigned char *) nlmsg_data(nlh) + payload, 0,
+              nlmsg_padlen(payload));
+
+       return nlh;
+}
+#endif
+
+/**
+ * nlmsg_put - Add a new netlink message to an skb
+ * @skb: socket buffer to store message in
+ * @pid: netlink process id
+ * @seq: sequence number of message
+ * @type: message type
+ * @payload: length of message payload
+ * @flags: message flags
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the message header and payload.
+ */
+static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+                                        int type, int payload, int flags)
+{
+       if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload)))
+               return NULL;
+
+       return __nlmsg_put(skb, pid, seq, type, payload, flags);
+}
+
+/**
+ * nlmsg_put_answer - Add a new callback based netlink message to an skb
+ * @skb: socket buffer to store message in
+ * @cb: netlink callback
+ * @type: message type
+ * @payload: length of message payload
+ * @flags: message flags
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the message header and payload.
+ */
+static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
+                                               struct netlink_callback *cb,
+                                               int type, int payload,
+                                               int flags)
+{
+       return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+                        type, payload, flags);
+}
+
+/**
+ * nlmsg_new - Allocate a new netlink message
+ * @size: maximum size of message
+ *
+ * Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
+ */
+static inline struct sk_buff *nlmsg_new(int size)
+{
+       return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+}
+
+/**
+ * nlmsg_end - Finalize a netlink message
+ * @skb: socket buffer the message is stored in
+ * @nlh: netlink message header
+ *
+ * Corrects the netlink message header to include the appeneded
+ * attributes. Only necessary if attributes have been added to
+ * the message.
+ *
+ * Returns the total data length of the skb.
+ */
+static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       nlh->nlmsg_len = skb->tail - (unsigned char *) nlh;
+
+       return skb->len;
+}
+
+/**
+ * nlmsg_cancel - Cancel construction of a netlink message
+ * @skb: socket buffer the message is stored in
+ * @nlh: netlink message header
+ *
+ * Removes the complete netlink message including all
+ * attributes from the socket buffer again. Returns -1.
+ */
+static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       skb_trim(skb, (unsigned char *) nlh - skb->data);
+
+       return -1;
+}
+
+/**
+ * nlmsg_free - free a netlink message
+ * @skb: socket buffer of netlink message
+ */
+static inline void nlmsg_free(struct sk_buff *skb)
+{
+       kfree_skb(skb);
+}
+
+/**
+ * nlmsg_multicast - multicast a netlink message
+ * @sk: netlink socket to spread messages to
+ * @skb: netlink message as socket buffer
+ * @pid: own netlink pid to avoid sending to yourself
+ * @group: multicast group id
+ */
+static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
+                                 u32 pid, unsigned int group)
+{
+       int err;
+
+       NETLINK_CB(skb).dst_group = group;
+
+       err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL);
+       if (err > 0)
+               err = 0;
+
+       return err;
+}
+
+/**
+ * nlmsg_unicast - unicast a netlink message
+ * @sk: netlink socket to spread message to
+ * @skb: netlink message as socket buffer
+ * @pid: netlink pid of the destination socket
+ */
+static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid)
+{
+       int err;
+
+       err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
+       if (err > 0)
+               err = 0;
+
+       return err;
+}
+
+/**
+ * nlmsg_for_each_msg - iterate over a stream of messages
+ * @pos: loop counter, set to current message
+ * @head: head of message stream
+ * @len: length of message stream
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nlmsg_for_each_msg(pos, head, len, rem) \
+       for (pos = head, rem = len; \
+            nlmsg_ok(pos, rem); \
+            pos = nlmsg_next(pos, &(rem)))
+
+/**************************************************************************
+ * Netlink Attributes
+ **************************************************************************/
+
+/**
+ * nla_attr_size - length of attribute not including padding
+ * @payload: length of payload
+ */
+static inline int nla_attr_size(int payload)
+{
+       return NLA_HDRLEN + payload;
+}
+
+/**
+ * nla_total_size - total length of attribute including padding
+ * @payload: length of payload
+ */
+static inline int nla_total_size(int payload)
+{
+       return NLA_ALIGN(nla_attr_size(payload));
+}
+
+/**
+ * nla_padlen - length of padding at the tail of attribute
+ * @payload: length of payload
+ */
+static inline int nla_padlen(int payload)
+{
+       return nla_total_size(payload) - nla_attr_size(payload);
+}
+
+/**
+ * nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *nla_data(const struct nlattr *nla)
+{
+       return (char *) nla + NLA_HDRLEN;
+}
+
+/**
+ * nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int nla_len(const struct nlattr *nla)
+{
+       return nla->nla_len - NLA_HDRLEN;
+}
+
+/**
+ * nla_ok - check if the netlink attribute fits into the remaining bytes
+ * @nla: netlink attribute
+ * @remaining: number of bytes remaining in attribute stream
+ */
+static inline int nla_ok(const struct nlattr *nla, int remaining)
+{
+       return remaining >= sizeof(*nla) &&
+              nla->nla_len >= sizeof(*nla) &&
+              nla->nla_len <= remaining;
+}
+
+/**
+ * nla_next - next netlink attribte in attribute stream
+ * @nla: netlink attribute
+ * @remaining: number of bytes remaining in attribute stream
+ *
+ * Returns the next netlink attribute in the attribute stream and
+ * decrements remaining by the size of the current attribute.
+ */
+static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+       int totlen = NLA_ALIGN(nla->nla_len);
+
+       *remaining -= totlen;
+       return (struct nlattr *) ((char *) nla + totlen);
+}
+
+/**
+ * nla_parse_nested - parse nested attributes
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @nla: attribute containing the nested attributes
+ * @policy: validation policy
+ *
+ * See nla_parse()
+ */
+static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
+                                  struct nlattr *nla,
+                                  struct nla_policy *policy)
+{
+       return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
+}
+/**
+ * nla_put_u8 - Add a u16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
+{
+       return nla_put(skb, attrtype, sizeof(u8), &value);
+}
+
+/**
+ * nla_put_u16 - Add a u16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
+{
+       return nla_put(skb, attrtype, sizeof(u16), &value);
+}
+
+/**
+ * nla_put_u32 - Add a u32 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
+{
+       return nla_put(skb, attrtype, sizeof(u32), &value);
+}
+
+/**
+ * nla_put_64 - Add a u64 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value)
+{
+       return nla_put(skb, attrtype, sizeof(u64), &value);
+}
+
+/**
+ * nla_put_string - Add a string netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @str: NUL terminated string
+ */
+static inline int nla_put_string(struct sk_buff *skb, int attrtype,
+                                const char *str)
+{
+       return nla_put(skb, attrtype, strlen(str) + 1, str);
+}
+
+/**
+ * nla_put_flag - Add a flag netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ */
+static inline int nla_put_flag(struct sk_buff *skb, int attrtype)
+{
+       return nla_put(skb, attrtype, 0, NULL);
+}
+
+/**
+ * nla_put_msecs - Add a msecs netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @jiffies: number of msecs in jiffies
+ */
+static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
+                               unsigned long jiffies)
+{
+       u64 tmp = jiffies_to_msecs(jiffies);
+       return nla_put(skb, attrtype, sizeof(u64), &tmp);
+}
+
+#define NLA_PUT(skb, attrtype, attrlen, data) \
+       do { \
+               if (nla_put(skb, attrtype, attrlen, data) < 0) \
+                       goto nla_put_failure; \
+       } while(0)
+
+#define NLA_PUT_TYPE(skb, type, attrtype, value) \
+       do { \
+               type __tmp = value; \
+               NLA_PUT(skb, attrtype, sizeof(type), &__tmp); \
+       } while(0)
+
+#define NLA_PUT_U8(skb, attrtype, value) \
+       NLA_PUT_TYPE(skb, u8, attrtype, value)
+
+#define NLA_PUT_U16(skb, attrtype, value) \
+       NLA_PUT_TYPE(skb, u16, attrtype, value)
+
+#define NLA_PUT_U32(skb, attrtype, value) \
+       NLA_PUT_TYPE(skb, u32, attrtype, value)
+
+#define NLA_PUT_U64(skb, attrtype, value) \
+       NLA_PUT_TYPE(skb, u64, attrtype, value)
+
+#define NLA_PUT_STRING(skb, attrtype, value) \
+       NLA_PUT(skb, attrtype, strlen(value) + 1, value)
+
+#define NLA_PUT_FLAG(skb, attrtype, value) \
+       NLA_PUT(skb, attrtype, 0, NULL)
+
+#define NLA_PUT_MSECS(skb, attrtype, jiffies) \
+       NLA_PUT_U64(skb, attrtype, jiffies_to_msecs(jiffies))
+
+/**
+ * nla_get_u32 - return payload of u32 attribute
+ * @nla: u32 netlink attribute
+ */
+static inline u32 nla_get_u32(struct nlattr *nla)
+{
+       return *(u32 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u16 - return payload of u16 attribute
+ * @nla: u16 netlink attribute
+ */
+static inline u16 nla_get_u16(struct nlattr *nla)
+{
+       return *(u16 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u8 - return payload of u8 attribute
+ * @nla: u8 netlink attribute
+ */
+static inline u8 nla_get_u8(struct nlattr *nla)
+{
+       return *(u8 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u64 - return payload of u64 attribute
+ * @nla: u64 netlink attribute
+ */
+static inline u64 nla_get_u64(struct nlattr *nla)
+{
+       u64 tmp;
+
+       nla_memcpy(&tmp, nla, sizeof(tmp));
+
+       return tmp;
+}
+
+/**
+ * nla_get_flag - return payload of flag attribute
+ * @nla: flag netlink attribute
+ */
+static inline int nla_get_flag(struct nlattr *nla)
+{
+       return !!nla;
+}
+
+/**
+ * nla_get_msecs - return payload of msecs attribute
+ * @nla: msecs netlink attribute
+ *
+ * Returns the number of milliseconds in jiffies.
+ */
+static inline unsigned long nla_get_msecs(struct nlattr *nla)
+{
+       u64 msecs = nla_get_u64(nla);
+
+       return msecs_to_jiffies((unsigned long) msecs);
+}
+
+/**
+ * nla_nest_start - Start a new level of nested attributes
+ * @skb: socket buffer to add attributes to
+ * @attrtype: attribute type of container
+ *
+ * Returns the container attribute
+ */
+static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
+{
+       struct nlattr *start = (struct nlattr *) skb->tail;
+
+       if (nla_put(skb, attrtype, 0, NULL) < 0)
+               return NULL;
+
+       return start;
+}
+
+/**
+ * nla_nest_end - Finalize nesting of attributes
+ * @skb: socket buffer the attribtues are stored in
+ * @start: container attribute
+ *
+ * Corrects the container attribute header to include the all
+ * appeneded attributes.
+ *
+ * Returns the total data length of the skb.
+ */
+static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
+{
+       start->nla_len = skb->tail - (unsigned char *) start;
+       return skb->len;
+}
+
+/**
+ * nla_nest_cancel - Cancel nesting of attributes
+ * @skb: socket buffer the message is stored in
+ * @start: container attribute
+ *
+ * Removes the container attribute and including all nested
+ * attributes. Returns -1.
+ */
+static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
+{
+       if (start)
+               skb_trim(skb, (unsigned char *) start - skb->data);
+
+       return -1;
+}
+
+/**
+ * nla_for_each_attr - iterate over a stream of attributes
+ * @pos: loop counter, set to current attribute
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr(pos, head, len, rem) \
+       for (pos = head, rem = len; \
+            nla_ok(pos, rem); \
+            pos = nla_next(pos, &(rem)))
+
+#endif
index 9bed7569ce3f30b7f13f522f973aa45e73a2840f..8700379685e0d7b1b7c74d6d6e5a5e46f7faed85 100644 (file)
@@ -49,6 +49,7 @@
 #include <net/udp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
+#include <net/netlink.h>
 
 DECLARE_MUTEX(rtnl_sem);
 
@@ -462,11 +463,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
        netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
 }
 
-static int rtnetlink_done(struct netlink_callback *cb)
-{
-       return 0;
-}
-
 /* Protected by RTNL sempahore.  */
 static struct rtattr **rta_buf;
 static int rtattr_max;
@@ -524,8 +520,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
        }
 
        if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
-               u32 rlen;
-
                if (link->dumpit == NULL)
                        link = &(rtnetlink_links[PF_UNSPEC][type]);
 
@@ -533,14 +527,11 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
                        goto err_inval;
 
                if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-                                               link->dumpit,
-                                               rtnetlink_done)) != 0) {
+                                               link->dumpit, NULL)) != 0) {
                        return -1;
                }
-               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-               if (rlen > skb->len)
-                       rlen = skb->len;
-               skb_pull(skb, rlen);
+
+               netlink_queue_skip(nlh, skb);
                return -1;
        }
 
@@ -579,75 +570,13 @@ err_inval:
        return -1;
 }
 
-/* 
- * Process one packet of messages.
- * Malformed skbs with wrong lengths of messages are discarded silently.
- */
-
-static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
-{
-       int err;
-       struct nlmsghdr * nlh;
-
-       while (skb->len >= NLMSG_SPACE(0)) {
-               u32 rlen;
-
-               nlh = (struct nlmsghdr *)skb->data;
-               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-                       return 0;
-               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-               if (rlen > skb->len)
-                       rlen = skb->len;
-               if (rtnetlink_rcv_msg(skb, nlh, &err)) {
-                       /* Not error, but we must interrupt processing here:
-                        *   Note, that in this case we do not pull message
-                        *   from skb, it will be processed later.
-                        */
-                       if (err == 0)
-                               return -1;
-                       netlink_ack(skb, nlh, err);
-               } else if (nlh->nlmsg_flags&NLM_F_ACK)
-                       netlink_ack(skb, nlh, 0);
-               skb_pull(skb, rlen);
-       }
-
-       return 0;
-}
-
-/*
- *  rtnetlink input queue processing routine:
- *     - process as much as there was in the queue upon entry.
- *     - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
- *       that will occur, when a dump started.
- */
-
 static void rtnetlink_rcv(struct sock *sk, int len)
 {
-       unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+       unsigned int qlen = 0;
 
        do {
-               struct sk_buff *skb;
-
                rtnl_lock();
-
-               if (qlen > skb_queue_len(&sk->sk_receive_queue))
-                       qlen = skb_queue_len(&sk->sk_receive_queue);
-
-               for (; qlen; qlen--) {
-                       skb = skb_dequeue(&sk->sk_receive_queue);
-                       if (rtnetlink_rcv_skb(skb)) {
-                               if (skb->len)
-                                       skb_queue_head(&sk->sk_receive_queue,
-                                                      skb);
-                               else {
-                                       kfree_skb(skb);
-                                       qlen--;
-                               }
-                               break;
-                       }
-                       kfree_skb(skb);
-               }
-
+               netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
                up(&rtnl_sem);
 
                netdev_run_todo();
index 95501e40100e72f986ca5208cdabe9be01feeb68..b7d13a4fff48568df05b7bea20c94e31869761ce 100644 (file)
@@ -336,6 +336,9 @@ void __kfree_skb(struct sk_buff *skb)
        }
 #ifdef CONFIG_NETFILTER
        nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(skb->nf_bridge);
 #endif
@@ -414,9 +417,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
        C(nfct);
        nf_conntrack_get(skb->nfct);
        C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        C(ipvs_property);
 #endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        C(nf_bridge);
        nf_bridge_get(skb->nf_bridge);
@@ -474,6 +485,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->nfct       = old->nfct;
        nf_conntrack_get(old->nfct);
        new->nfctinfo   = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       new->nfct_reasm = old->nfct_reasm;
+       nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
        new->ipvs_property = old->ipvs_property;
 #endif
index 71f3c7350c6e80cc778954acc6dc9b589ecee5f3..39061ed53cfdd2471f2293a2e44c8a76109f478d 100644 (file)
@@ -724,12 +724,6 @@ done:
        return skb->len;
 }
 
-static int inet_diag_dump_done(struct netlink_callback *cb)
-{
-       return 0;
-}
-
-
 static __inline__ int
 inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
@@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                goto err_inval;
                }
                return netlink_dump_start(idiagnl, skb, nlh,
-                                         inet_diag_dump,
-                                         inet_diag_dump_done);
+                                         inet_diag_dump, NULL);
        } else {
                return inet_diag_get_exact(skb, nlh);
        }
index 7d917e4ce1d9761d15b9917f78f35b8ce9ec4797..9d3c8b5f327e8f4c546ae00cfd04e20870439a63 100644 (file)
@@ -5,6 +5,20 @@
 menu "IP: Netfilter Configuration"
        depends on INET && NETFILTER
 
+config NF_CONNTRACK_IPV4
+       tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && NF_CONNTRACK
+       ---help---
+         Connection tracking keeps a record of what packets have passed
+         through your machine, in order to figure out how they are related
+         into connections.
+
+         This is IPv4 support on Layer 3 independent connection tracking.
+         Layer 3 independent connection tracking is experimental scheme
+         which generalize ip_conntrack to support other layer 3 protocols.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 # connection tracking, helpers and protocols
 config IP_NF_CONNTRACK
        tristate "Connection tracking (required for masq/NAT)"
@@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE
        tristate "Packet type match support"
        depends on IP_NF_IPTABLES
        help
-         Packet type matching allows you to match a packet by
-         its "class", eg. BROADCAST, MULTICAST, ...
+         Packet type matching allows you to match a packet by
+         its "class", eg. BROADCAST, MULTICAST, ...
 
          Typical usage:
          iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
@@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS
 
 config IP_NF_MATCH_HELPER
        tristate "Helper match support"
-       depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+       depends on IP_NF_IPTABLES
+       depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
        help
          Helper matching allows you to match packets in dynamic connections
          tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER
 
 config IP_NF_MATCH_STATE
        tristate "Connection state match support"
-       depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+       depends on IP_NF_IPTABLES
+       depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
        help
          Connection state matching allows you to match packets based on their
          relationship to a tracked connection (ie. previous packets).  This
@@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE
 
 config IP_NF_MATCH_CONNTRACK
        tristate "Connection tracking match support"
-       depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+       depends on IP_NF_IPTABLES
+       depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
        help
          This is a general conntrack match module, a superset of the state match.
 
@@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT
 
 config IP_NF_MATCH_CONNMARK
        tristate  'Connection mark match support'
-       depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
+       depends on IP_NF_IPTABLES
+       depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
        help
          This option adds a `connmark' match, which allows you to match the
          connection mark value previously set for the session by `CONNMARK'. 
@@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK
 
 config IP_NF_MATCH_CONNBYTES
        tristate  'Connection byte/packet counter match support'
-       depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
+       depends on IP_NF_IPTABLES
+       depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
        help
          This option adds a `connbytes' match, which allows you to match the
          number of bytes and/or packets for each direction within a connection.
@@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL
 
 config IP_NF_TARGET_CONNMARK
        tristate  'CONNMARK target support'
-       depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
+       depends on IP_NF_MANGLE
+       depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
        help
          This option adds a `CONNMARK' target, which allows one to manipulate
          the connection mark value.  Similar to the MARK target, but
@@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK
 
 config IP_NF_TARGET_CLUSTERIP
        tristate "CLUSTERIP target support (EXPERIMENTAL)"
-       depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
+       depends on IP_NF_IPTABLES && EXPERIMENTAL
+       depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
        help
          The CLUSTERIP target allows you to build load-balancing clusters of
          network servers without having a dedicated load-balancing
@@ -782,7 +803,7 @@ config IP_NF_RAW
 config IP_NF_TARGET_NOTRACK
        tristate  'NOTRACK target support'
        depends on IP_NF_RAW
-       depends on IP_NF_CONNTRACK
+       depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
        help
          The NOTRACK target allows a select rule to specify
          which packets *not* to enter the conntrack/NAT
index dab4b58dd31ee6b69e99e5bef48d7c3de9f77224..058c48e258fc560f9a08fc422f280a10bc49cdd1 100644 (file)
@@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
 
 obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-objs  :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
index 5c1c0a3d1c4bc5ba2a72230f1c53d79d4f8fcb89..d2a4fec228626431ac5e086d46fb2a9afeed9932 100644 (file)
@@ -1376,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
                                ip_conntrack_expect_put(exp);
                        }
                }
-               write_unlock(&ip_conntrack_lock);
+               write_unlock_bh(&ip_conntrack_lock);
        } else {
                /* This basically means we have to flush everything*/
                write_lock_bh(&ip_conntrack_lock);
index 9bcb398fbc1fc02dcfb7f2f6e0f039e8a238d1a0..45c52d8f4d99750c0407b8d8cd3510792e85ef3f 100644 (file)
@@ -29,7 +29,7 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 #define CLUSTERIP_VERSION "0.8"
 
@@ -316,14 +316,14 @@ target(struct sk_buff **pskb,
 {
        const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
        enum ip_conntrack_info ctinfo;
-       struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
-       u_int32_t hash;
+       u_int32_t *mark, hash;
 
        /* don't need to clusterip_config_get() here, since refcount
         * is only decremented by destroy() - and ip_tables guarantees
         * that the ->target() function isn't called after ->destroy() */
 
-       if (!ct) {
+       mark = nf_ct_get_mark((*pskb), &ctinfo);
+       if (mark == NULL) {
                printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
                        /* FIXME: need to drop invalid ones, since replies
                         * to outgoing connections of other nodes will be 
@@ -346,7 +346,7 @@ target(struct sk_buff **pskb,
 
        switch (ctinfo) {
                case IP_CT_NEW:
-                       ct->mark = hash;
+                       *mark = hash;
                        break;
                case IP_CT_RELATED:
                case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -363,7 +363,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
        DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-       DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
+       DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
        if (!clusterip_responsible(cipinfo->config, hash)) {
                DEBUGP("not responsible\n");
                return NF_DROP;
index 05d66ab59424fc0bf0ae2f6729d6bb10ac6b52ab..8acac5a40a92d4bca1efb405550f2ae3172708e3 100644 (file)
@@ -29,7 +29,7 @@ MODULE_LICENSE("GPL");
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CONNMARK.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -43,24 +43,24 @@ target(struct sk_buff **pskb,
        u_int32_t diff;
        u_int32_t nfmark;
        u_int32_t newmark;
+       u_int32_t ctinfo;
+       u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
-       enum ip_conntrack_info ctinfo;
-       struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
-       if (ct) {
+       if (ctmark) {
            switch(markinfo->mode) {
            case IPT_CONNMARK_SET:
-               newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
-               if (newmark != ct->mark)
-                   ct->mark = newmark;
+               newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+               if (newmark != *ctmark)
+                   *ctmark = newmark;
                break;
            case IPT_CONNMARK_SAVE:
-               newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-               if (ct->mark != newmark)
-                   ct->mark = newmark;
+               newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+               if (*ctmark != newmark)
+                   *ctmark = newmark;
                break;
            case IPT_CONNMARK_RESTORE:
                nfmark = (*pskb)->nfmark;
-               diff = (ct->mark ^ nfmark) & markinfo->mask;
+               diff = (*ctmark ^ nfmark) & markinfo->mask;
                if (diff != 0)
                    (*pskb)->nfmark = nfmark ^ diff;
                break;
index a4bb9b3bc292390d2a8f77511bf96a59c689e12a..e3c69d072c6e4af064ce7c413974a08af17a6e00 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -23,7 +23,7 @@ target(struct sk_buff **pskb,
           If there is a real ct entry correspondig to this packet, 
           it'll hang aroun till timing out. We don't deal with it
           for performance reasons. JK */
-       (*pskb)->nfct = &ip_conntrack_untracked.ct_general;
+       nf_ct_untrack(*pskb);
        (*pskb)->nfctinfo = IP_CT_NEW;
        nf_conntrack_get((*pskb)->nfct);
 
index df4a42c6da22285a629395c16b5f6e661e2fb5bd..d68a048b7176f7bfba6789add18447b3f513eac7 100644 (file)
@@ -10,7 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_connbytes.h>
 
@@ -46,60 +46,59 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
        const struct ipt_connbytes_info *sinfo = matchinfo;
-       enum ip_conntrack_info ctinfo;
-       struct ip_conntrack *ct;
        u_int64_t what = 0;     /* initialize to make gcc happy */
+       const struct ip_conntrack_counter *counters;
 
-       if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
+       if (!(counters = nf_ct_get_counters(skb)))
                return 0; /* no match */
 
        switch (sinfo->what) {
        case IPT_CONNBYTES_PKTS:
                switch (sinfo->direction) {
                case IPT_CONNBYTES_DIR_ORIGINAL:
-                       what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+                       what = counters[IP_CT_DIR_ORIGINAL].packets;
                        break;
                case IPT_CONNBYTES_DIR_REPLY:
-                       what = ct->counters[IP_CT_DIR_REPLY].packets;
+                       what = counters[IP_CT_DIR_REPLY].packets;
                        break;
                case IPT_CONNBYTES_DIR_BOTH:
-                       what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
-                       what += ct->counters[IP_CT_DIR_REPLY].packets;
+                       what = counters[IP_CT_DIR_ORIGINAL].packets;
+                       what += counters[IP_CT_DIR_REPLY].packets;
                        break;
                }
                break;
        case IPT_CONNBYTES_BYTES:
                switch (sinfo->direction) {
                case IPT_CONNBYTES_DIR_ORIGINAL:
-                       what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+                       what = counters[IP_CT_DIR_ORIGINAL].bytes;
                        break;
                case IPT_CONNBYTES_DIR_REPLY:
-                       what = ct->counters[IP_CT_DIR_REPLY].bytes;
+                       what = counters[IP_CT_DIR_REPLY].bytes;
                        break;
                case IPT_CONNBYTES_DIR_BOTH:
-                       what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
-                       what += ct->counters[IP_CT_DIR_REPLY].bytes;
+                       what = counters[IP_CT_DIR_ORIGINAL].bytes;
+                       what += counters[IP_CT_DIR_REPLY].bytes;
                        break;
                }
                break;
        case IPT_CONNBYTES_AVGPKT:
                switch (sinfo->direction) {
                case IPT_CONNBYTES_DIR_ORIGINAL:
-                       what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
-                                       ct->counters[IP_CT_DIR_ORIGINAL].packets);
+                       what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
+                                       counters[IP_CT_DIR_ORIGINAL].packets);
                        break;
                case IPT_CONNBYTES_DIR_REPLY:
-                       what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
-                                       ct->counters[IP_CT_DIR_REPLY].packets);
+                       what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
+                                       counters[IP_CT_DIR_REPLY].packets);
                        break;
                case IPT_CONNBYTES_DIR_BOTH:
                        {
                                u_int64_t bytes;
                                u_int64_t pkts;
-                               bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
-                                       ct->counters[IP_CT_DIR_REPLY].bytes;
-                               pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
-                                       ct->counters[IP_CT_DIR_REPLY].packets;
+                               bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
+                                       counters[IP_CT_DIR_REPLY].bytes;
+                               pkts = counters[IP_CT_DIR_ORIGINAL].packets+
+                                       counters[IP_CT_DIR_REPLY].packets;
 
                                /* FIXME_THEORETICAL: what to do if sum
                                 * overflows ? */
index bf8de47ce0041487822543d275523541e4023dc1..5306ef293b92ff39f6cbd8cbd39dad713848dd63 100644 (file)
@@ -28,7 +28,7 @@ MODULE_LICENSE("GPL");
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_connmark.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static int
 match(const struct sk_buff *skb,
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
        const struct ipt_connmark_info *info = matchinfo;
-       enum ip_conntrack_info ctinfo;
-       struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-       if (!ct)
+       u_int32_t ctinfo;
+       const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
+       if (!ctmark)
                return 0;
 
-       return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+       return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
 }
 
 static int
index c1d22801b7cf97247955b543a482e0427ed16b71..c8d18705469b987df766797dd33f86372cfa3d88 100644 (file)
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 #include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_conntrack.h>
 
@@ -18,6 +25,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -102,6 +111,93 @@ match(const struct sk_buff *skb,
        return 1;
 }
 
+#else /* CONFIG_IP_NF_CONNTRACK */
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+       const struct ipt_conntrack_info *sinfo = matchinfo;
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       unsigned int statebit;
+
+       ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+
+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+
+       if (ct == &nf_conntrack_untracked)
+               statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+       else if (ct)
+               statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+       else
+               statebit = IPT_CONNTRACK_STATE_INVALID;
+       if(sinfo->flags & IPT_CONNTRACK_STATE) {
+               if (ct) {
+                       if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
+                           ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+                               statebit |= IPT_CONNTRACK_STATE_SNAT;
+
+                       if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
+                           ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+                               statebit |= IPT_CONNTRACK_STATE_DNAT;
+               }
+
+               if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_PROTO) {
+               if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
+               if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
+               if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
+               if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
+               if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_STATUS) {
+               if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+                       return 0;
+       }
+
+       if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+               unsigned long expires;
+
+               if(!ct)
+                       return 0;
+
+               expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+
+               if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+                       return 0;
+       }
+
+       return 1;
+}
+
+#endif /* CONFIG_NF_IP_CONNTRACK */
+
 static int check(const char *tablename,
                 const struct ipt_ip *ip,
                 void *matchinfo,
index 3e7dd014de4363c8fda5b08cc11ee6d3925b75fa..bf14e1c7798a2299dad0bad987a2ae37ab569280 100644 (file)
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#endif
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_helper.h>
 
@@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module");
 #define DEBUGP(format, args...)
 #endif
 
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -73,6 +80,53 @@ out_unlock:
        return ret;
 }
 
+#else /* CONFIG_IP_NF_CONNTRACK */
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+       const struct ipt_helper_info *info = matchinfo;
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       int ret = info->invert;
+       
+       ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+       if (!ct) {
+               DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+               return ret;
+       }
+
+       if (!ct->master) {
+               DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+               return ret;
+       }
+
+       read_lock_bh(&nf_conntrack_lock);
+       if (!ct->master->helper) {
+               DEBUGP("ipt_helper: master ct %p has no helper\n", 
+                       exp->expectant);
+               goto out_unlock;
+       }
+
+       DEBUGP("master's name = %s , info->name = %s\n", 
+               ct->master->helper->name, info->name);
+
+       if (info->name[0] == '\0')
+               ret ^= 1;
+       else
+               ret ^= !strncmp(ct->master->helper->name, info->name, 
+                               strlen(ct->master->helper->name));
+out_unlock:
+       read_unlock_bh(&nf_conntrack_lock);
+       return ret;
+}
+#endif
+
 static int check(const char *tablename,
                 const struct ipt_ip *ip,
                 void *matchinfo,
index b1511b97ea5f9c674452ee7c891b3a6eab7b4790..4d7f16b70cec68b3c9cfc3c2e56cd11ebacef548 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_state.h>
 
@@ -30,9 +30,9 @@ match(const struct sk_buff *skb,
        enum ip_conntrack_info ctinfo;
        unsigned int statebit;
 
-       if (skb->nfct == &ip_conntrack_untracked.ct_general)
+       if (nf_ct_is_untracked(skb))
                statebit = IPT_STATE_UNTRACKED;
-       else if (!ip_conntrack_get(skb, &ctinfo))
+       else if (!nf_ct_get_ctinfo(skb, &ctinfo))
                statebit = IPT_STATE_INVALID;
        else
                statebit = IPT_STATE_BIT(ctinfo);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644 (file)
index 0000000..8202c1c
--- /dev/null
@@ -0,0 +1,571 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - move L3 protocol dependent part to this file.
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - add get_features() to support various size of conntrack
+ *       structures.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
+
+static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+                            struct nf_conntrack_tuple *tuple)
+{
+       u_int32_t _addrs[2], *ap;
+       ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
+                               sizeof(u_int32_t) * 2, _addrs);
+       if (ap == NULL)
+               return 0;
+
+       tuple->src.u3.ip = ap[0];
+       tuple->dst.u3.ip = ap[1];
+
+       return 1;
+}
+
+static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+                          const struct nf_conntrack_tuple *orig)
+{
+       tuple->src.u3.ip = orig->dst.u3.ip;
+       tuple->dst.u3.ip = orig->src.u3.ip;
+
+       return 1;
+}
+
+static int ipv4_print_tuple(struct seq_file *s,
+                           const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+                         NIPQUAD(tuple->src.u3.ip),
+                         NIPQUAD(tuple->dst.u3.ip));
+}
+
+static int ipv4_print_conntrack(struct seq_file *s,
+                               const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+/* Returns new sk_buff, or NULL */
+static struct sk_buff *
+nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+       skb_orphan(skb);
+
+        local_bh_disable();
+        skb = ip_defrag(skb, user);
+        local_bh_enable();
+
+        if (skb)
+               ip_send_check(skb->nh.iph);
+
+        return skb;
+}
+
+static int
+ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
+            u_int8_t *protonum)
+{
+       /* Never happen */
+       if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+               if (net_ratelimit()) {
+                       printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
+                       (*pskb)->nh.iph->protocol, hooknum);
+               }
+               return -NF_DROP;
+       }
+
+       *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
+       *protonum = (*pskb)->nh.iph->protocol;
+
+       return NF_ACCEPT;
+}
+
+int nat_module_is_loaded = 0;
+static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
+{
+       if (nat_module_is_loaded)
+               return NF_CT_F_NAT;
+
+       return NF_CT_F_BASIC;
+}
+
+static unsigned int ipv4_confirm(unsigned int hooknum,
+                                struct sk_buff **pskb,
+                                const struct net_device *in,
+                                const struct net_device *out,
+                                int (*okfn)(struct sk_buff *))
+{
+       /* We've seen it coming out the other side: confirm it */
+       return nf_conntrack_confirm(pskb);
+}
+
+static unsigned int ipv4_conntrack_help(unsigned int hooknum,
+                                     struct sk_buff **pskb,
+                                     const struct net_device *in,
+                                     const struct net_device *out,
+                                     int (*okfn)(struct sk_buff *))
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       /* This is where we call the helper: as the packet goes out. */
+       ct = nf_ct_get(*pskb, &ctinfo);
+       if (ct && ct->helper) {
+               unsigned int ret;
+               ret = ct->helper->help(pskb,
+                                      (*pskb)->nh.raw - (*pskb)->data
+                                                      + (*pskb)->nh.iph->ihl*4,
+                                      ct, ctinfo);
+               if (ret != NF_ACCEPT)
+                       return ret;
+       }
+       return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+                                         struct sk_buff **pskb,
+                                         const struct net_device *in,
+                                         const struct net_device *out,
+                                         int (*okfn)(struct sk_buff *))
+{
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+       /* Previously seen (loopback)?  Ignore.  Do this before
+          fragment check. */
+       if ((*pskb)->nfct)
+               return NF_ACCEPT;
+#endif
+
+       /* Gather fragments. */
+       if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+               *pskb = nf_ct_ipv4_gather_frags(*pskb,
+                                               hooknum == NF_IP_PRE_ROUTING ?
+                                               IP_DEFRAG_CONNTRACK_IN :
+                                               IP_DEFRAG_CONNTRACK_OUT);
+               if (!*pskb)
+                       return NF_STOLEN;
+       }
+       return NF_ACCEPT;
+}
+
+static unsigned int ipv4_refrag(unsigned int hooknum,
+                               struct sk_buff **pskb,
+                               const struct net_device *in,
+                               const struct net_device *out,
+                               int (*okfn)(struct sk_buff *))
+{
+       struct rtable *rt = (struct rtable *)(*pskb)->dst;
+
+       /* We've seen it coming out the other side: confirm */
+       if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+               return NF_DROP;
+
+       /* Local packets are never produced too large for their
+          interface.  We degfragment them at LOCAL_OUT, however,
+          so we have to refragment them here. */
+       if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
+           !skb_shinfo(*pskb)->tso_size) {
+               /* No hook can be after us, so this should be OK. */
+               ip_fragment(*pskb, okfn);
+               return NF_STOLEN;
+       }
+       return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+                                     struct sk_buff **pskb,
+                                     const struct net_device *in,
+                                     const struct net_device *out,
+                                     int (*okfn)(struct sk_buff *))
+{
+       return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+                                        struct sk_buff **pskb,
+                                        const struct net_device *in,
+                                        const struct net_device *out,
+                                        int (*okfn)(struct sk_buff *))
+{
+       /* root is playing with raw sockets. */
+       if ((*pskb)->len < sizeof(struct iphdr)
+           || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+               if (net_ratelimit())
+                       printk("ipt_hook: happy cracking.\n");
+               return NF_ACCEPT;
+       }
+       return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+   make it the first hook. */
+static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
+       .hook           = ipv4_conntrack_defrag,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_PRE_ROUTING,
+       .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_in_ops = {
+       .hook           = ipv4_conntrack_in,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_PRE_ROUTING,
+       .priority       = NF_IP_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
+       .hook           = ipv4_conntrack_defrag,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_LOCAL_OUT,
+       .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
+       .hook           = ipv4_conntrack_local,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_LOCAL_OUT,
+       .priority       = NF_IP_PRI_CONNTRACK,
+};
+
+/* helpers */
+static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
+       .hook           = ipv4_conntrack_help,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_POST_ROUTING,
+       .priority       = NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
+       .hook           = ipv4_conntrack_help,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_LOCAL_IN,
+       .priority       = NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ipv4_conntrack_out_ops = {
+       .hook           = ipv4_refrag,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_POST_ROUTING,
+       .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
+       .hook           = ipv4_confirm,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET,
+       .hooknum        = NF_IP_LOCAL_IN,
+       .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+#ifdef CONFIG_SYSCTL
+/* From nf_conntrack_proto_icmp.c */
+extern unsigned long nf_ct_icmp_timeout;
+static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+       {
+               .ctl_name       = NET_NF_CONNTRACK_ICMP_TIMEOUT,
+               .procname       = "nf_conntrack_icmp_timeout",
+               .data           = &nf_ct_icmp_timeout,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+        { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+       {
+               .ctl_name       = NET_NETFILTER,
+               .procname       = "netfilter",
+               .mode           = 0555,
+               .child          = nf_ct_sysctl_table,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+       {
+               .ctl_name       = CTL_NET,
+               .procname       = "net",
+               .mode           = 0555,
+               .child          = nf_ct_netfilter_table,
+       },
+       { .ctl_name = 0 }
+};
+#endif
+
+/* Fast function for those who don't want to parse /proc (and I don't
+   blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+   mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       struct nf_conntrack_tuple_hash *h;
+       struct nf_conntrack_tuple tuple;
+       
+       NF_CT_TUPLE_U_BLANK(&tuple);
+       tuple.src.u3.ip = inet->rcv_saddr;
+       tuple.src.u.tcp.port = inet->sport;
+       tuple.dst.u3.ip = inet->daddr;
+       tuple.dst.u.tcp.port = inet->dport;
+       tuple.src.l3num = PF_INET;
+       tuple.dst.protonum = IPPROTO_TCP;
+
+       /* We only do TCP at the moment: is there a better way? */
+       if (strcmp(sk->sk_prot->name, "TCP")) {
+               DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+               return -ENOPROTOOPT;
+       }
+
+       if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+               DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
+                      *len, sizeof(struct sockaddr_in));
+               return -EINVAL;
+       }
+
+       h = nf_conntrack_find_get(&tuple, NULL);
+       if (h) {
+               struct sockaddr_in sin;
+               struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+               sin.sin_family = AF_INET;
+               sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+                       .tuple.dst.u.tcp.port;
+               sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+                       .tuple.dst.u3.ip;
+
+               DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+                      NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+               nf_ct_put(ct);
+               if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+                       return -EFAULT;
+               else
+                       return 0;
+       }
+       DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+              NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
+              NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+       return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+       .pf             = PF_INET,
+       .get_optmin     = SO_ORIGINAL_DST,
+       .get_optmax     = SO_ORIGINAL_DST+1,
+       .get            = &getorigdst,
+};
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
+       .l3proto         = PF_INET,
+       .name            = "ipv4",
+       .pkt_to_tuple    = ipv4_pkt_to_tuple,
+       .invert_tuple    = ipv4_invert_tuple,
+       .print_tuple     = ipv4_print_tuple,
+       .print_conntrack = ipv4_print_conntrack,
+       .prepare         = ipv4_prepare,
+       .get_features    = ipv4_get_features,
+       .me              = THIS_MODULE,
+};
+
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
+static int init_or_cleanup(int init)
+{
+       int ret = 0;
+
+       if (!init) goto cleanup;
+
+       ret = nf_register_sockopt(&so_getorigdst);
+       if (ret < 0) {
+               printk(KERN_ERR "Unable to register netfilter socket option\n");
+               goto cleanup_nothing;
+       }
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register tcp.\n");
+               goto cleanup_sockopt;
+       }
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register udp.\n");
+               goto cleanup_tcp;
+       }
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register icmp.\n");
+               goto cleanup_udp;
+       }
+
+       ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register ipv4\n");
+               goto cleanup_icmp;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
+               goto cleanup_ipv4;
+       }
+       ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
+               goto cleanup_defragops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_in_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
+               goto cleanup_defraglocalops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register local out hook.\n");
+               goto cleanup_inops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register local helper hook.\n");
+               goto cleanup_inandlocalops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
+               goto cleanup_helperinops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
+               goto cleanup_helperoutops;
+       }
+
+       ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv4: can't register local in hook.\n");
+               goto cleanup_inoutandlocalops;
+       }
+
+#ifdef CONFIG_SYSCTL
+       nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+       if (nf_ct_ipv4_sysctl_header == NULL) {
+               printk("nf_conntrack: can't register to sysctl.\n");
+               ret = -ENOMEM;
+               goto cleanup_localinops;
+       }
+#endif
+
+       /* For use by REJECT target */
+       ip_ct_attach = __nf_conntrack_attach;
+
+       return ret;
+
+ cleanup:
+       synchronize_net();
+       ip_ct_attach = NULL;
+#ifdef CONFIG_SYSCTL
+       unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
+ cleanup_localinops:
+#endif
+       nf_unregister_hook(&ipv4_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+       nf_unregister_hook(&ipv4_conntrack_out_ops);
+ cleanup_helperoutops:
+       nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
+ cleanup_helperinops:
+       nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
+ cleanup_inandlocalops:
+       nf_unregister_hook(&ipv4_conntrack_local_out_ops);
+ cleanup_inops:
+       nf_unregister_hook(&ipv4_conntrack_in_ops);
+ cleanup_defraglocalops:
+       nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+       nf_unregister_hook(&ipv4_conntrack_defrag_ops);
+ cleanup_ipv4:
+       nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ cleanup_icmp:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
+ cleanup_udp:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
+ cleanup_tcp:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ cleanup_sockopt:
+       nf_unregister_sockopt(&so_getorigdst);
+ cleanup_nothing:
+       return ret;
+}
+
+MODULE_LICENSE("GPL");
+
+static int __init init(void)
+{
+       need_nf_conntrack();
+       return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+       init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+void need_ip_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644 (file)
index 0000000..7ddb5c0
--- /dev/null
@@ -0,0 +1,301 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - enable working with Layer 3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+unsigned long nf_ct_icmp_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmp_pkt_to_tuple(const struct sk_buff *skb,
+                            unsigned int dataoff,
+                            struct nf_conntrack_tuple *tuple)
+{
+       struct icmphdr _hdr, *hp;
+
+       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL)
+               return 0;
+
+       tuple->dst.u.icmp.type = hp->type;
+       tuple->src.u.icmp.id = hp->un.echo.id;
+       tuple->dst.u.icmp.code = hp->code;
+
+       return 1;
+}
+
+static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                            const struct nf_conntrack_tuple *orig)
+{
+       /* Add 1; spaces filled with 0. */
+       static u_int8_t invmap[]
+               = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+                   [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+                   [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+                   [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+                   [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+                   [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+                   [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+                   [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
+
+       if (orig->dst.u.icmp.type >= sizeof(invmap)
+           || !invmap[orig->dst.u.icmp.type])
+               return 0;
+
+       tuple->src.u.icmp.id = orig->src.u.icmp.id;
+       tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+                           const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "type=%u code=%u id=%u ",
+                         tuple->dst.u.icmp.type,
+                         tuple->dst.u.icmp.code,
+                         ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmp_print_conntrack(struct seq_file *s,
+                               const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+                      const struct sk_buff *skb,
+                      unsigned int dataoff,
+                      enum ip_conntrack_info ctinfo,
+                      int pf,
+                      unsigned int hooknum)
+{
+       /* Try to delete connection immediately after all replies:
+           won't actually vanish as we still have skb, and del_timer
+           means this will only run once even if count hits zero twice
+           (theoretically possible with SMP) */
+       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+               if (atomic_dec_and_test(&ct->proto.icmp.count)
+                   && del_timer(&ct->timeout))
+                       ct->timeout.function((unsigned long)ct);
+       } else {
+               atomic_inc(&ct->proto.icmp.count);
+               nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+               nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
+       }
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmp_new(struct nf_conn *conntrack,
+                   const struct sk_buff *skb, unsigned int dataoff)
+{
+       static u_int8_t valid_new[]
+               = { [ICMP_ECHO] = 1,
+                   [ICMP_TIMESTAMP] = 1,
+                   [ICMP_INFO_REQUEST] = 1,
+                   [ICMP_ADDRESS] = 1 };
+
+       if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+           || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+               /* Can't create a new ICMP `conn' with this. */
+               DEBUGP("icmp: can't create new conn with type %u\n",
+                      conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+               NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+               return 0;
+       }
+       atomic_set(&conntrack->proto.icmp.count, 0);
+       return 1;
+}
+
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct sk_buff *skb,
+                 enum ip_conntrack_info *ctinfo,
+                 unsigned int hooknum)
+{
+       struct nf_conntrack_tuple innertuple, origtuple;
+       struct {
+               struct icmphdr icmp;
+               struct iphdr ip;
+       } _in, *inside;
+       struct nf_conntrack_protocol *innerproto;
+       struct nf_conntrack_tuple_hash *h;
+       int dataoff;
+
+       NF_CT_ASSERT(skb->nfct == NULL);
+
+       /* Not enough header? */
+       inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+       if (inside == NULL)
+               return -NF_ACCEPT;
+
+       /* Ignore ICMP's containing fragments (shouldn't happen) */
+       if (inside->ip.frag_off & htons(IP_OFFSET)) {
+               DEBUGP("icmp_error_message: fragment of proto %u\n",
+                      inside->ip.protocol);
+               return -NF_ACCEPT;
+       }
+
+       innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+       dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+       /* Are they talking about one of our connections? */
+       if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
+                            inside->ip.protocol, &origtuple,
+                            &nf_conntrack_l3proto_ipv4, innerproto)) {
+               DEBUGP("icmp_error_message: ! get_tuple p=%u",
+                      inside->ip.protocol);
+               return -NF_ACCEPT;
+       }
+
+        /* Ordinarily, we'd expect the inverted tupleproto, but it's
+           been preserved inside the ICMP. */
+        if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+                               &nf_conntrack_l3proto_ipv4, innerproto)) {
+               DEBUGP("icmp_error_message: no match\n");
+               return -NF_ACCEPT;
+       }
+
+       *ctinfo = IP_CT_RELATED;
+
+       h = nf_conntrack_find_get(&innertuple, NULL);
+       if (!h) {
+               /* Locally generated ICMPs will match inverted if they
+                  haven't been SNAT'ed yet */
+               /* FIXME: NAT code has to handle half-done double NAT --RR */
+               if (hooknum == NF_IP_LOCAL_OUT)
+                       h = nf_conntrack_find_get(&origtuple, NULL);
+
+               if (!h) {
+                       DEBUGP("icmp_error_message: no match\n");
+                       return -NF_ACCEPT;
+               }
+
+               /* Reverse direction from that found */
+               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+                       *ctinfo += IP_CT_IS_REPLY;
+       } else {
+               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+                       *ctinfo += IP_CT_IS_REPLY;
+       }
+
+        /* Update skb to refer to this connection */
+        skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+        skb->nfctinfo = *ctinfo;
+        return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, unsigned int dataoff,
+          enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+{
+       struct icmphdr _ih, *icmph;
+
+       /* Not enough header? */
+       icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+       if (icmph == NULL) {
+               if (LOG_INVALID(IPPROTO_ICMP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_icmp: short packet ");
+               return -NF_ACCEPT;
+       }
+
+       /* See ip_conntrack_proto_tcp.c */
+       if (hooknum != NF_IP_PRE_ROUTING)
+               goto checksum_skipped;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_HW:
+               if (!(u16)csum_fold(skb->csum))
+                       break;
+               if (LOG_INVALID(IPPROTO_ICMP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_icmp: bad HW ICMP checksum ");
+               return -NF_ACCEPT;
+       case CHECKSUM_NONE:
+               if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+                       if (LOG_INVALID(IPPROTO_ICMP))
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                                             NULL,
+                                             "nf_ct_icmp: bad ICMP checksum ");
+                       return -NF_ACCEPT;
+               }
+       default:
+               break;
+       }
+
+checksum_skipped:
+       /*
+        *      18 is the highest 'known' ICMP type. Anything else is a mystery
+        *
+        *      RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+        *                discarded.
+        */
+       if (icmph->type > NR_ICMP_TYPES) {
+               if (LOG_INVALID(IPPROTO_ICMP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_icmp: invalid ICMP type ");
+               return -NF_ACCEPT;
+       }
+
+       /* Need to track icmp error message? */
+       if (icmph->type != ICMP_DEST_UNREACH
+           && icmph->type != ICMP_SOURCE_QUENCH
+           && icmph->type != ICMP_TIME_EXCEEDED
+           && icmph->type != ICMP_PARAMETERPROB
+           && icmph->type != ICMP_REDIRECT)
+               return NF_ACCEPT;
+
+       return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
+{
+       .list                   = { NULL, NULL },
+       .l3proto                = PF_INET,
+       .proto                  = IPPROTO_ICMP,
+       .name                   = "icmp",
+       .pkt_to_tuple           = icmp_pkt_to_tuple,
+       .invert_tuple           = icmp_invert_tuple,
+       .print_tuple            = icmp_print_tuple,
+       .print_conntrack        = icmp_print_conntrack,
+       .packet                 = icmp_packet,
+       .new                    = icmp_new,
+       .error                  = icmp_error,
+       .destroy                = NULL,
+       .me                     = NULL
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
index 6e3480426939150b7c997dc01c41d7a1805951d1..a6026d2787d2c042a05924c33ebba66f91b6f101 100644 (file)
@@ -176,6 +176,11 @@ resubmit:
                if (ipprot->flags & INET6_PROTO_FINAL) {
                        struct ipv6hdr *hdr;    
 
+                       /* Free reference early: we don't need it any more,
+                          and it may hold ip_conntrack module loaded
+                          indefinitely. */
+                       nf_reset(skb);
+
                        skb_postpull_rcsum(skb, skb->nh.raw,
                                           skb->h.raw - skb->nh.raw);
                        hdr = skb->nh.ipv6h;
index dbd9767b32e45ee3373e94ae20afee2b5449a83c..c1fa693511a176c71f601cb3d78696ba83e25437 100644 (file)
@@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NETFILTER
        to->nfmark = from->nfmark;
        /* Connection association is same as pre-frag packet */
+       nf_conntrack_put(to->nfct);
        to->nfct = from->nfct;
        nf_conntrack_get(to->nfct);
        to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(to->nfct_reasm);
+       to->nfct_reasm = from->nfct_reasm;
+       nf_conntrack_get_reasm(to->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(to->nf_bridge);
        to->nf_bridge = from->nf_bridge;
index bb7ccfe33f2384e9bbfeadcd8d48dabba4f417b1..971ba60bf6e9ccf7dedde5b7e5486db8ef1dca6f 100644 (file)
@@ -278,5 +278,19 @@ config IP6_NF_RAW
          If you want to compile it as a module, say M here and read
          <file:Documentation/modules.txt>.  If unsure, say `N'.
 
+config NF_CONNTRACK_IPV6
+       tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && NF_CONNTRACK
+       ---help---
+         Connection tracking keeps a record of what packets have passed
+         through your machine, in order to figure out how they are related
+         into connections.
+
+         This is IPv6 support on Layer 3 independent connection tracking.
+         Layer 3 independent connection tracking is experimental scheme
+         which generalize ip_conntrack to support other layer 3 protocols.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 endmenu
 
index 2b2c370e8b1ccf72d3864a19d0e5a93ab0135e6a..9ab5b2ca1f59033eb7111432013936298f24e395 100644 (file)
@@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
index 0c7584f92172c8d961a5a0f938e0557b66812793..eab8fb864ee0a3a50d7a398565f1939b19a2c158 100644 (file)
@@ -56,9 +56,9 @@ checkentry(const char *tablename,
        return 1;
 }
 
-static struct ip6t_target ip6t_mark_reg = {
-       .name           = "MARK",
-       .target         = target,
+static struct ip6t_target ip6t_mark_reg = { 
+       .name           = "MARK",
+       .target         = target,
        .checkentry     = checkentry,
        .me             = THIS_MODULE
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644 (file)
index 0000000..e2c90b3
--- /dev/null
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - support Layer 3 protocol independent connection tracking.
+ *       Based on the original ip_conntrack code which had the following
+ *       copyright information:
+ *             (C) 1999-2001 Paul `Rusty' Russell
+ *             (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - add get_features() to support various size of conntrack
+ *       structures.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
+
+static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+                            struct nf_conntrack_tuple *tuple)
+{
+       u_int32_t _addrs[8], *ap;
+
+       ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
+                               sizeof(_addrs), _addrs);
+       if (ap == NULL)
+               return 0;
+
+       memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+       memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+
+       return 1;
+}
+
+static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+                            const struct nf_conntrack_tuple *orig)
+{
+       memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
+       memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
+
+       return 1;
+}
+
+static int ipv6_print_tuple(struct seq_file *s,
+                           const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
+                         NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
+                         NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
+}
+
+static int ipv6_print_conntrack(struct seq_file *s,
+                               const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+/*
+ * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * if success, *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - it may return pointer pointing beyond end of packet,
+ *          if the last recognized header is truncated in the middle.
+ *        - if packet is truncated, so that all parsed headers are skipped,
+ *          it returns -1.
+ *        - if packet is fragmented, return pointer of the fragment header.
+ *        - ESP is unparsable for now and considered like
+ *          normal payload protocol.
+ *        - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ */
+
+int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp,
+                          int len)
+{
+       u8 nexthdr = *nexthdrp;
+
+       while (ipv6_ext_hdr(nexthdr)) {
+               struct ipv6_opt_hdr hdr;
+               int hdrlen;
+
+               if (len < (int)sizeof(struct ipv6_opt_hdr))
+                       return -1;
+               if (nexthdr == NEXTHDR_NONE)
+                       break;
+               if (nexthdr == NEXTHDR_FRAGMENT)
+                       break;
+               if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+                       BUG();
+               if (nexthdr == NEXTHDR_AUTH)
+                       hdrlen = (hdr.hdrlen+2)<<2;
+               else
+                       hdrlen = ipv6_optlen(&hdr);
+
+               nexthdr = hdr.nexthdr;
+               len -= hdrlen;
+               start += hdrlen;
+       }
+
+       *nexthdrp = nexthdr;
+       return start;
+}
+
+static int
+ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
+            u_int8_t *protonum)
+{
+       unsigned int extoff;
+       unsigned char pnum;
+       int protoff;
+
+       extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
+       pnum = (*pskb)->nh.ipv6h->nexthdr;
+
+       protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+                                        (*pskb)->len - extoff);
+
+       /*
+        * (protoff == (*pskb)->len) mean that the packet doesn't have no data
+        * except of IPv6 & ext headers. but it's tracked anyway. - YK
+        */
+       if ((protoff < 0) || (protoff > (*pskb)->len)) {
+               DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
+               NF_CT_STAT_INC(error);
+               NF_CT_STAT_INC(invalid);
+               return -NF_ACCEPT;
+       }
+
+       *dataoff = protoff;
+       *protonum = pnum;
+       return NF_ACCEPT;
+}
+
+static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
+{
+       return NF_CT_F_BASIC;
+}
+
+static unsigned int ipv6_confirm(unsigned int hooknum,
+                                struct sk_buff **pskb,
+                                const struct net_device *in,
+                                const struct net_device *out,
+                                int (*okfn)(struct sk_buff *))
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       /* This is where we call the helper: as the packet goes out. */
+       ct = nf_ct_get(*pskb, &ctinfo);
+       if (ct && ct->helper) {
+               unsigned int ret, protoff;
+               unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
+                                     - (*pskb)->data;
+               unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+
+               protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+                                                (*pskb)->len - extoff);
+               if (protoff < 0 || protoff > (*pskb)->len ||
+                   pnum == NEXTHDR_FRAGMENT) {
+                       DEBUGP("proto header not found\n");
+                       return NF_ACCEPT;
+               }
+
+               ret = ct->helper->help(pskb, protoff, ct, ctinfo);
+               if (ret != NF_ACCEPT)
+                       return ret;
+       }
+
+       /* We've seen it coming out the other side: confirm it */
+
+       return nf_conntrack_confirm(pskb);
+}
+
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+                              struct net_device *in,
+                              struct net_device *out,
+                              int (*okfn)(struct sk_buff *));
+static unsigned int ipv6_defrag(unsigned int hooknum,
+                               struct sk_buff **pskb,
+                               const struct net_device *in,
+                               const struct net_device *out,
+                               int (*okfn)(struct sk_buff *))
+{
+       struct sk_buff *reasm;
+
+       /* Previously seen (loopback)?  */
+       if ((*pskb)->nfct)
+               return NF_ACCEPT;
+
+       reasm = nf_ct_frag6_gather(*pskb);
+
+       /* queued */
+       if (reasm == NULL)
+               return NF_STOLEN;
+
+       /* error occured or not fragmented */
+       if (reasm == *pskb)
+               return NF_ACCEPT;
+
+       nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+                          (struct net_device *)out, okfn);
+
+       return NF_STOLEN;
+}
+
+static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+                                     struct sk_buff **pskb,
+                                     const struct net_device *in,
+                                     const struct net_device *out,
+                                     int (*okfn)(struct sk_buff *))
+{
+       struct sk_buff *reasm = (*pskb)->nfct_reasm;
+
+       /* This packet is fragmented and has reassembled packet. */
+       if (reasm) {
+               /* Reassembled packet isn't parsed yet ? */
+               if (!reasm->nfct) {
+                       unsigned int ret;
+
+                       ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+                       if (ret != NF_ACCEPT)
+                               return ret;
+               }
+               nf_conntrack_get(reasm->nfct);
+               (*pskb)->nfct = reasm->nfct;
+               return NF_ACCEPT;
+       }
+
+       return nf_conntrack_in(PF_INET6, hooknum, pskb);
+}
+
+static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+                                        struct sk_buff **pskb,
+                                        const struct net_device *in,
+                                        const struct net_device *out,
+                                        int (*okfn)(struct sk_buff *))
+{
+       /* root is playing with raw sockets. */
+       if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+               if (net_ratelimit())
+                       printk("ipv6_conntrack_local: packet too short\n");
+               return NF_ACCEPT;
+       }
+       return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+   make it the first hook. */
+static struct nf_hook_ops ipv6_conntrack_defrag_ops = {
+       .hook           = ipv6_defrag,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_PRE_ROUTING,
+       .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv6_conntrack_in_ops = {
+       .hook           = ipv6_conntrack_in,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_PRE_ROUTING,
+       .priority       = NF_IP6_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv6_conntrack_local_out_ops = {
+       .hook           = ipv6_conntrack_local,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_LOCAL_OUT,
+       .priority       = NF_IP6_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = {
+       .hook           = ipv6_defrag,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_LOCAL_OUT,
+       .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
+};
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ipv6_conntrack_out_ops = {
+       .hook           = ipv6_confirm,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_POST_ROUTING,
+       .priority       = NF_IP6_PRI_LAST,
+};
+
+static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
+       .hook           = ipv6_confirm,
+       .owner          = THIS_MODULE,
+       .pf             = PF_INET6,
+       .hooknum        = NF_IP6_LOCAL_IN,
+       .priority       = NF_IP6_PRI_LAST-1,
+};
+
+#ifdef CONFIG_SYSCTL
+
+/* From nf_conntrack_proto_icmpv6.c */
+extern unsigned long nf_ct_icmpv6_timeout;
+
+/* From nf_conntrack_frag6.c */
+extern unsigned long nf_ct_frag6_timeout;
+extern unsigned long nf_ct_frag6_low_thresh;
+extern unsigned long nf_ct_frag6_high_thresh;
+
+static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+       {
+               .ctl_name       = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
+               .procname       = "nf_conntrack_icmpv6_timeout",
+               .data           = &nf_ct_icmpv6_timeout,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
+               .procname       = "nf_conntrack_frag6_timeout",
+               .data           = &nf_ct_frag6_timeout,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
+               .procname       = "nf_conntrack_frag6_low_thresh",
+               .data           = &nf_ct_frag6_low_thresh,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
+               .procname       = "nf_conntrack_frag6_high_thresh",
+               .data           = &nf_ct_frag6_high_thresh,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+        { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+       {
+               .ctl_name       = NET_NETFILTER,
+               .procname       = "netfilter",
+               .mode           = 0555,
+               .child          = nf_ct_sysctl_table,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+       {
+               .ctl_name       = CTL_NET,
+               .procname       = "net",
+               .mode           = 0555,
+               .child          = nf_ct_netfilter_table,
+       },
+       { .ctl_name = 0 }
+};
+#endif
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
+       .l3proto                = PF_INET6,
+       .name                   = "ipv6",
+       .pkt_to_tuple           = ipv6_pkt_to_tuple,
+       .invert_tuple           = ipv6_invert_tuple,
+       .print_tuple            = ipv6_print_tuple,
+       .print_conntrack        = ipv6_print_conntrack,
+       .prepare                = ipv6_prepare,
+       .get_features           = ipv6_get_features,
+       .me                     = THIS_MODULE,
+};
+
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+static int init_or_cleanup(int init)
+{
+       int ret = 0;
+
+       if (!init) goto cleanup;
+
+       ret = nf_ct_frag6_init();
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't initialize frag6.\n");
+               goto cleanup_nothing;
+       }
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register tcp.\n");
+               goto cleanup_frag6;
+       }
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register udp.\n");
+               goto cleanup_tcp;
+       }
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register icmpv6.\n");
+               goto cleanup_udp;
+       }
+
+       ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register ipv6\n");
+               goto cleanup_icmpv6;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_defrag_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register pre-routing defrag "
+                      "hook.\n");
+               goto cleanup_ipv6;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register local_out defrag "
+                      "hook.\n");
+               goto cleanup_defragops;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_in_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register pre-routing hook.\n");
+               goto cleanup_defraglocalops;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_local_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register local out hook.\n");
+               goto cleanup_inops;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_out_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register post-routing hook.\n");
+               goto cleanup_inandlocalops;
+       }
+
+       ret = nf_register_hook(&ipv6_conntrack_local_in_ops);
+       if (ret < 0) {
+               printk("nf_conntrack_ipv6: can't register local in hook.\n");
+               goto cleanup_inoutandlocalops;
+       }
+
+#ifdef CONFIG_SYSCTL
+       nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+       if (nf_ct_ipv6_sysctl_header == NULL) {
+               printk("nf_conntrack: can't register to sysctl.\n");
+               ret = -ENOMEM;
+               goto cleanup_localinops;
+       }
+#endif
+       return ret;
+
+ cleanup:
+       synchronize_net();
+#ifdef CONFIG_SYSCTL
+       unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
+ cleanup_localinops:
+#endif
+       nf_unregister_hook(&ipv6_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+       nf_unregister_hook(&ipv6_conntrack_out_ops);
+ cleanup_inandlocalops:
+       nf_unregister_hook(&ipv6_conntrack_local_out_ops);
+ cleanup_inops:
+       nf_unregister_hook(&ipv6_conntrack_in_ops);
+ cleanup_defraglocalops:
+       nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+       nf_unregister_hook(&ipv6_conntrack_defrag_ops);
+ cleanup_ipv6:
+       nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ cleanup_icmpv6:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
+ cleanup_udp:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
+ cleanup_tcp:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
+ cleanup_frag6:
+       nf_ct_frag6_cleanup();
+ cleanup_nothing:
+       return ret;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+
+static int __init init(void)
+{
+       need_nf_conntrack();
+       return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+       init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+void need_ip6_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(need_ip6_conntrack);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644 (file)
index 0000000..c0f1da5
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - ICMPv6 tracking support. Derived from the original ip_conntrack code
+ *       net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
+ *       copyright information:
+ *             (C) 1999-2001 Paul `Rusty' Russell
+ *             (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+
+unsigned long nf_ct_icmpv6_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+                              unsigned int dataoff,
+                              struct nf_conntrack_tuple *tuple)
+{
+       struct icmp6hdr _hdr, *hp;
+
+       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL)
+               return 0;
+       tuple->dst.u.icmp.type = hp->icmp6_type;
+       tuple->src.u.icmp.id = hp->icmp6_identifier;
+       tuple->dst.u.icmp.code = hp->icmp6_code;
+
+       return 1;
+}
+
+static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+                              const struct nf_conntrack_tuple *orig)
+{
+       /* Add 1; spaces filled with 0. */
+       static u_int8_t invmap[] = {
+               [ICMPV6_ECHO_REQUEST - 128]     = ICMPV6_ECHO_REPLY + 1,
+               [ICMPV6_ECHO_REPLY - 128]       = ICMPV6_ECHO_REQUEST + 1,
+               [ICMPV6_NI_QUERY - 128]         = ICMPV6_NI_QUERY + 1,
+               [ICMPV6_NI_REPLY - 128]         = ICMPV6_NI_REPLY +1
+       };
+
+       __u8 type = orig->dst.u.icmp.type - 128;
+       if (type >= sizeof(invmap) || !invmap[type])
+               return 0;
+
+       tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+       tuple->dst.u.icmp.type = invmap[type] - 1;
+       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmpv6_print_tuple(struct seq_file *s,
+                             const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "type=%u code=%u id=%u ",
+                         tuple->dst.u.icmp.type,
+                         tuple->dst.u.icmp.code,
+                         ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmpv6_print_conntrack(struct seq_file *s,
+                                 const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+                      const struct sk_buff *skb,
+                      unsigned int dataoff,
+                      enum ip_conntrack_info ctinfo,
+                      int pf,
+                      unsigned int hooknum)
+{
+       /* Try to delete connection immediately after all replies:
+           won't actually vanish as we still have skb, and del_timer
+           means this will only run once even if count hits zero twice
+           (theoretically possible with SMP) */
+       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+               if (atomic_dec_and_test(&ct->proto.icmp.count)
+                   && del_timer(&ct->timeout))
+                       ct->timeout.function((unsigned long)ct);
+       } else {
+               atomic_inc(&ct->proto.icmp.count);
+               nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+               nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
+       }
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmpv6_new(struct nf_conn *conntrack,
+                     const struct sk_buff *skb,
+                     unsigned int dataoff)
+{
+       static u_int8_t valid_new[] = {
+               [ICMPV6_ECHO_REQUEST - 128] = 1,
+               [ICMPV6_NI_QUERY - 128] = 1
+       };
+
+       if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
+           || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
+               /* Can't create a new ICMPv6 `conn' with this. */
+               DEBUGP("icmp: can't create new conn with type %u\n",
+                      conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+               NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+               return 0;
+       }
+       atomic_set(&conntrack->proto.icmp.count, 0);
+       return 1;
+}
+
+extern int
+nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
+static int
+icmpv6_error_message(struct sk_buff *skb,
+                    unsigned int icmp6off,
+                    enum ip_conntrack_info *ctinfo,
+                    unsigned int hooknum)
+{
+       struct nf_conntrack_tuple intuple, origtuple;
+       struct nf_conntrack_tuple_hash *h;
+       struct icmp6hdr _hdr, *hp;
+       unsigned int inip6off;
+       struct nf_conntrack_protocol *inproto;
+       u_int8_t inprotonum;
+       unsigned int inprotoff;
+
+       NF_CT_ASSERT(skb->nfct == NULL);
+
+       hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
+       if (hp == NULL) {
+               DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
+               return -NF_ACCEPT;
+       }
+
+       inip6off = icmp6off + sizeof(_hdr);
+       if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
+                         &inprotonum, sizeof(inprotonum)) != 0) {
+               DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
+               return -NF_ACCEPT;
+       }
+       inprotoff = nf_ct_ipv6_skip_exthdr(skb,
+                                          inip6off + sizeof(struct ipv6hdr),
+                                          &inprotonum,
+                                          skb->len - inip6off
+                                                   - sizeof(struct ipv6hdr));
+
+       if ((inprotoff < 0) || (inprotoff > skb->len) ||
+           (inprotonum == NEXTHDR_FRAGMENT)) {
+               DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
+               return -NF_ACCEPT;
+       }
+
+       inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+
+       /* Are they talking about one of our connections? */
+       if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
+                            &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
+               DEBUGP("icmpv6_error: Can't get tuple\n");
+               return -NF_ACCEPT;
+       }
+
+       /* Ordinarily, we'd expect the inverted tupleproto, but it's
+          been preserved inside the ICMP. */
+       if (!nf_ct_invert_tuple(&intuple, &origtuple,
+                               &nf_conntrack_l3proto_ipv6, inproto)) {
+               DEBUGP("icmpv6_error: Can't invert tuple\n");
+               return -NF_ACCEPT;
+       }
+
+       *ctinfo = IP_CT_RELATED;
+
+       h = nf_conntrack_find_get(&intuple, NULL);
+       if (!h) {
+               DEBUGP("icmpv6_error: no match\n");
+               return -NF_ACCEPT;
+       } else {
+               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+                       *ctinfo += IP_CT_IS_REPLY;
+       }
+
+       /* Update skb to refer to this connection */
+       skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+       skb->nfctinfo = *ctinfo;
+       return -NF_ACCEPT;
+}
+
+static int
+icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
+            enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+{
+       struct icmp6hdr _ih, *icmp6h;
+
+       icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+       if (icmp6h == NULL) {
+               if (LOG_INVALID(IPPROTO_ICMPV6))
+               nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+                             "nf_ct_icmpv6: short packet ");
+               return -NF_ACCEPT;
+       }
+
+       if (hooknum != NF_IP6_PRE_ROUTING)
+               goto skipped;
+
+       /* Ignore it if the checksum's bogus. */
+       if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+                           skb->len - dataoff, IPPROTO_ICMPV6,
+                           skb_checksum(skb, dataoff,
+                                        skb->len - dataoff, 0))) {
+               nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+                             "nf_ct_icmpv6: ICMPv6 checksum failed\n");
+               return -NF_ACCEPT;
+       }
+
+skipped:
+
+       /* is not error message ? */
+       if (icmp6h->icmp6_type >= 128)
+               return NF_ACCEPT;
+
+       return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
+{
+       .l3proto                = PF_INET6,
+       .proto                  = IPPROTO_ICMPV6,
+       .name                   = "icmpv6",
+       .pkt_to_tuple           = icmpv6_pkt_to_tuple,
+       .invert_tuple           = icmpv6_invert_tuple,
+       .print_tuple            = icmpv6_print_tuple,
+       .print_conntrack        = icmpv6_print_conntrack,
+       .packet                 = icmpv6_packet,
+       .new                    = icmpv6_new,
+       .error                  = icmpv6_error,
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644 (file)
index 0000000..7640b9b
--- /dev/null
@@ -0,0 +1,885 @@
+/*
+ * IPv6 fragment reassembly for connection tracking
+ *
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
+#define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
+#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
+
+int nf_ct_frag6_high_thresh = 256*1024;
+int nf_ct_frag6_low_thresh = 192*1024;
+int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
+
+struct nf_ct_frag6_skb_cb
+{
+       struct inet6_skb_parm   h;
+       int                     offset;
+       struct sk_buff          *orig;
+};
+
+#define NFCT_FRAG6_CB(skb)     ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+
+struct nf_ct_frag6_queue
+{
+       struct nf_ct_frag6_queue        *next;
+       struct list_head lru_list;              /* lru list member      */
+
+       __u32                   id;             /* fragment id          */
+       struct in6_addr         saddr;
+       struct in6_addr         daddr;
+
+       spinlock_t              lock;
+       atomic_t                refcnt;
+       struct timer_list       timer;          /* expire timer         */
+       struct sk_buff          *fragments;
+       int                     len;
+       int                     meat;
+       struct timeval          stamp;
+       unsigned int            csum;
+       __u8                    last_in;        /* has first/last segment arrived? */
+#define COMPLETE               4
+#define FIRST_IN               2
+#define LAST_IN                        1
+       __u16                   nhoffset;
+       struct nf_ct_frag6_queue        **pprev;
+};
+
+/* Hash table. */
+
+#define FRAG6Q_HASHSZ  64
+
+static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
+static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
+static u32 nf_ct_frag6_hash_rnd;
+static LIST_HEAD(nf_ct_frag6_lru_list);
+int nf_ct_frag6_nqueues = 0;
+
+static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
+{
+       if (fq->next)
+               fq->next->pprev = fq->pprev;
+       *fq->pprev = fq->next;
+       list_del(&fq->lru_list);
+       nf_ct_frag6_nqueues--;
+}
+
+static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
+{
+       write_lock(&nf_ct_frag6_lock);
+       __fq_unlink(fq);
+       write_unlock(&nf_ct_frag6_lock);
+}
+
+static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+                              struct in6_addr *daddr)
+{
+       u32 a, b, c;
+
+       a = saddr->s6_addr32[0];
+       b = saddr->s6_addr32[1];
+       c = saddr->s6_addr32[2];
+
+       a += JHASH_GOLDEN_RATIO;
+       b += JHASH_GOLDEN_RATIO;
+       c += nf_ct_frag6_hash_rnd;
+       __jhash_mix(a, b, c);
+
+       a += saddr->s6_addr32[3];
+       b += daddr->s6_addr32[0];
+       c += daddr->s6_addr32[1];
+       __jhash_mix(a, b, c);
+
+       a += daddr->s6_addr32[2];
+       b += daddr->s6_addr32[3];
+       c += id;
+       __jhash_mix(a, b, c);
+
+       return c & (FRAG6Q_HASHSZ - 1);
+}
+
+static struct timer_list nf_ct_frag6_secret_timer;
+int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
+
+static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+{
+       unsigned long now = jiffies;
+       int i;
+
+       write_lock(&nf_ct_frag6_lock);
+       get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
+       for (i = 0; i < FRAG6Q_HASHSZ; i++) {
+               struct nf_ct_frag6_queue *q;
+
+               q = nf_ct_frag6_hash[i];
+               while (q) {
+                       struct nf_ct_frag6_queue *next = q->next;
+                       unsigned int hval = ip6qhashfn(q->id,
+                                                      &q->saddr,
+                                                      &q->daddr);
+
+                       if (hval != i) {
+                               /* Unlink. */
+                               if (q->next)
+                                       q->next->pprev = q->pprev;
+                               *q->pprev = q->next;
+
+                               /* Relink to new hash chain. */
+                               if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
+                                       q->next->pprev = &q->next;
+                               nf_ct_frag6_hash[hval] = q;
+                               q->pprev = &nf_ct_frag6_hash[hval];
+                       }
+
+                       q = next;
+               }
+       }
+       write_unlock(&nf_ct_frag6_lock);
+
+       mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+}
+
+atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+
+/* Memory Tracking Functions. */
+static inline void frag_kfree_skb(struct sk_buff *skb)
+{
+       atomic_sub(skb->truesize, &nf_ct_frag6_mem);
+       if (NFCT_FRAG6_CB(skb)->orig)
+               kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+
+       kfree_skb(skb);
+}
+
+static inline void frag_free_queue(struct nf_ct_frag6_queue *fq)
+{
+       atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+       kfree(fq);
+}
+
+static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
+{
+       struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+
+       if (!fq)
+               return NULL;
+       atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+       return fq;
+}
+
+/* Destruction primitives. */
+
+/* Complete destruction of fq. */
+static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq)
+{
+       struct sk_buff *fp;
+
+       BUG_TRAP(fq->last_in&COMPLETE);
+       BUG_TRAP(del_timer(&fq->timer) == 0);
+
+       /* Release all fragment data. */
+       fp = fq->fragments;
+       while (fp) {
+               struct sk_buff *xp = fp->next;
+
+               frag_kfree_skb(fp);
+               fp = xp;
+       }
+
+       frag_free_queue(fq);
+}
+
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+{
+       if (atomic_dec_and_test(&fq->refcnt))
+               nf_ct_frag6_destroy(fq);
+}
+
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+{
+       if (del_timer(&fq->timer))
+               atomic_dec(&fq->refcnt);
+
+       if (!(fq->last_in & COMPLETE)) {
+               fq_unlink(fq);
+               atomic_dec(&fq->refcnt);
+               fq->last_in |= COMPLETE;
+       }
+}
+
+static void nf_ct_frag6_evictor(void)
+{
+       struct nf_ct_frag6_queue *fq;
+       struct list_head *tmp;
+
+       for (;;) {
+               if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh)
+                       return;
+               read_lock(&nf_ct_frag6_lock);
+               if (list_empty(&nf_ct_frag6_lru_list)) {
+                       read_unlock(&nf_ct_frag6_lock);
+                       return;
+               }
+               tmp = nf_ct_frag6_lru_list.next;
+               fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
+               atomic_inc(&fq->refcnt);
+               read_unlock(&nf_ct_frag6_lock);
+
+               spin_lock(&fq->lock);
+               if (!(fq->last_in&COMPLETE))
+                       fq_kill(fq);
+               spin_unlock(&fq->lock);
+
+               fq_put(fq);
+       }
+}
+
+static void nf_ct_frag6_expire(unsigned long data)
+{
+       struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
+
+       spin_lock(&fq->lock);
+
+       if (fq->last_in & COMPLETE)
+               goto out;
+
+       fq_kill(fq);
+
+out:
+       spin_unlock(&fq->lock);
+       fq_put(fq);
+}
+
+/* Creation primitives. */
+
+
+static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
+                                         struct nf_ct_frag6_queue *fq_in)
+{
+       struct nf_ct_frag6_queue *fq;
+
+       write_lock(&nf_ct_frag6_lock);
+#ifdef CONFIG_SMP
+       for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+               if (fq->id == fq_in->id && 
+                   !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
+                   !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
+                       atomic_inc(&fq->refcnt);
+                       write_unlock(&nf_ct_frag6_lock);
+                       fq_in->last_in |= COMPLETE;
+                       fq_put(fq_in);
+                       return fq;
+               }
+       }
+#endif
+       fq = fq_in;
+
+       if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
+               atomic_inc(&fq->refcnt);
+
+       atomic_inc(&fq->refcnt);
+       if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
+               fq->next->pprev = &fq->next;
+       nf_ct_frag6_hash[hash] = fq;
+       fq->pprev = &nf_ct_frag6_hash[hash];
+       INIT_LIST_HEAD(&fq->lru_list);
+       list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
+       nf_ct_frag6_nqueues++;
+       write_unlock(&nf_ct_frag6_lock);
+       return fq;
+}
+
+
+static struct nf_ct_frag6_queue *
+nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,                               struct in6_addr *dst)
+{
+       struct nf_ct_frag6_queue *fq;
+
+       if ((fq = frag_alloc_queue()) == NULL) {
+               DEBUGP("Can't alloc new queue\n");
+               goto oom;
+       }
+
+       memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
+
+       fq->id = id;
+       ipv6_addr_copy(&fq->saddr, src);
+       ipv6_addr_copy(&fq->daddr, dst);
+
+       init_timer(&fq->timer);
+       fq->timer.function = nf_ct_frag6_expire;
+       fq->timer.data = (long) fq;
+       fq->lock = SPIN_LOCK_UNLOCKED;
+       atomic_set(&fq->refcnt, 1);
+
+       return nf_ct_frag6_intern(hash, fq);
+
+oom:
+       return NULL;
+}
+
+static __inline__ struct nf_ct_frag6_queue *
+fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+       struct nf_ct_frag6_queue *fq;
+       unsigned int hash = ip6qhashfn(id, src, dst);
+
+       read_lock(&nf_ct_frag6_lock);
+       for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+               if (fq->id == id && 
+                   !ipv6_addr_cmp(src, &fq->saddr) &&
+                   !ipv6_addr_cmp(dst, &fq->daddr)) {
+                       atomic_inc(&fq->refcnt);
+                       read_unlock(&nf_ct_frag6_lock);
+                       return fq;
+               }
+       }
+       read_unlock(&nf_ct_frag6_lock);
+
+       return nf_ct_frag6_create(hash, id, src, dst);
+}
+
+
+static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 
+                            struct frag_hdr *fhdr, int nhoff)
+{
+       struct sk_buff *prev, *next;
+       int offset, end;
+
+       if (fq->last_in & COMPLETE) {
+               DEBUGP("Allready completed\n");
+               goto err;
+       }
+
+       offset = ntohs(fhdr->frag_off) & ~0x7;
+       end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
+                       ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+
+       if ((unsigned int)end > IPV6_MAXPLEN) {
+               DEBUGP("offset is too large.\n");
+               return -1;
+       }
+
+       if (skb->ip_summed == CHECKSUM_HW)
+               skb->csum = csum_sub(skb->csum,
+                                    csum_partial(skb->nh.raw,
+                                                 (u8*)(fhdr + 1) - skb->nh.raw,
+                                                 0));
+
+       /* Is this the final fragment? */
+       if (!(fhdr->frag_off & htons(IP6_MF))) {
+               /* If we already have some bits beyond end
+                * or have different end, the segment is corrupted.
+                */
+               if (end < fq->len ||
+                   ((fq->last_in & LAST_IN) && end != fq->len)) {
+                       DEBUGP("already received last fragment\n");
+                       goto err;
+               }
+               fq->last_in |= LAST_IN;
+               fq->len = end;
+       } else {
+               /* Check if the fragment is rounded to 8 bytes.
+                * Required by the RFC.
+                */
+               if (end & 0x7) {
+                       /* RFC2460 says always send parameter problem in
+                        * this case. -DaveM
+                        */
+                       DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
+                       return -1;
+               }
+               if (end > fq->len) {
+                       /* Some bits beyond end -> corruption. */
+                       if (fq->last_in & LAST_IN) {
+                               DEBUGP("last packet already reached.\n");
+                               goto err;
+                       }
+                       fq->len = end;
+               }
+       }
+
+       if (end == offset)
+               goto err;
+
+       /* Point into the IP datagram 'data' part. */
+       if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
+               DEBUGP("queue: message is too short.\n");
+               goto err;
+       }
+       if (end-offset < skb->len) {
+               if (pskb_trim(skb, end - offset)) {
+                       DEBUGP("Can't trim\n");
+                       goto err;
+               }
+               if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+                       skb->ip_summed = CHECKSUM_NONE;
+       }
+
+       /* Find out which fragments are in front and at the back of us
+        * in the chain of fragments so far.  We must know where to put
+        * this fragment, right?
+        */
+       prev = NULL;
+       for (next = fq->fragments; next != NULL; next = next->next) {
+               if (NFCT_FRAG6_CB(next)->offset >= offset)
+                       break;  /* bingo! */
+               prev = next;
+       }
+
+       /* We found where to put this one.  Check for overlap with
+        * preceding fragment, and, if needed, align things so that
+        * any overlaps are eliminated.
+        */
+       if (prev) {
+               int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
+
+               if (i > 0) {
+                       offset += i;
+                       if (end <= offset) {
+                               DEBUGP("overlap\n");
+                               goto err;
+                       }
+                       if (!pskb_pull(skb, i)) {
+                               DEBUGP("Can't pull\n");
+                               goto err;
+                       }
+                       if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+                               skb->ip_summed = CHECKSUM_NONE;
+               }
+       }
+
+       /* Look for overlap with succeeding segments.
+        * If we can merge fragments, do it.
+        */
+       while (next && NFCT_FRAG6_CB(next)->offset < end) {
+               /* overlap is 'i' bytes */
+               int i = end - NFCT_FRAG6_CB(next)->offset;
+
+               if (i < next->len) {
+                       /* Eat head of the next overlapped fragment
+                        * and leave the loop. The next ones cannot overlap.
+                        */
+                       DEBUGP("Eat head of the overlapped parts.: %d", i);
+                       if (!pskb_pull(next, i))
+                               goto err;
+
+                       /* next fragment */
+                       NFCT_FRAG6_CB(next)->offset += i;
+                       fq->meat -= i;
+                       if (next->ip_summed != CHECKSUM_UNNECESSARY)
+                               next->ip_summed = CHECKSUM_NONE;
+                       break;
+               } else {
+                       struct sk_buff *free_it = next;
+
+                       /* Old fragmnet is completely overridden with
+                        * new one drop it.
+                        */
+                       next = next->next;
+
+                       if (prev)
+                               prev->next = next;
+                       else
+                               fq->fragments = next;
+
+                       fq->meat -= free_it->len;
+                       frag_kfree_skb(free_it);
+               }
+       }
+
+       NFCT_FRAG6_CB(skb)->offset = offset;
+
+       /* Insert this fragment in the chain of fragments. */
+       skb->next = next;
+       if (prev)
+               prev->next = skb;
+       else
+               fq->fragments = skb;
+
+       skb->dev = NULL;
+       skb_get_timestamp(skb, &fq->stamp);
+       fq->meat += skb->len;
+       atomic_add(skb->truesize, &nf_ct_frag6_mem);
+
+       /* The first fragment.
+        * nhoffset is obtained from the first fragment, of course.
+        */
+       if (offset == 0) {
+               fq->nhoffset = nhoff;
+               fq->last_in |= FIRST_IN;
+       }
+       write_lock(&nf_ct_frag6_lock);
+       list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
+       write_unlock(&nf_ct_frag6_lock);
+       return 0;
+
+err:
+       return -1;
+}
+
+/*
+ *     Check if this packet is complete.
+ *     Returns NULL on failure by any reason, and pointer
+ *     to current nexthdr field in reassembled frame.
+ *
+ *     It is called with locked fq, and caller must check that
+ *     queue is eligible for reassembly i.e. it is not COMPLETE,
+ *     the last and the first frames arrived and all the bits are here.
+ */
+static struct sk_buff *
+nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+{
+       struct sk_buff *fp, *op, *head = fq->fragments;
+       int    payload_len;
+
+       fq_kill(fq);
+
+       BUG_TRAP(head != NULL);
+       BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
+
+       /* Unfragmented part is taken from the first segment. */
+       payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+       if (payload_len > IPV6_MAXPLEN) {
+               DEBUGP("payload len is too large.\n");
+               goto out_oversize;
+       }
+
+       /* Head of list must not be cloned. */
+       if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+               DEBUGP("skb is cloned but can't expand head");
+               goto out_oom;
+       }
+
+       /* If the first fragment is fragmented itself, we split
+        * it to two chunks: the first with data and paged part
+        * and the second, holding only fragments. */
+       if (skb_shinfo(head)->frag_list) {
+               struct sk_buff *clone;
+               int i, plen = 0;
+
+               if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
+                       DEBUGP("Can't alloc skb\n");
+                       goto out_oom;
+               }
+               clone->next = head->next;
+               head->next = clone;
+               skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+               skb_shinfo(head)->frag_list = NULL;
+               for (i=0; i<skb_shinfo(head)->nr_frags; i++)
+                       plen += skb_shinfo(head)->frags[i].size;
+               clone->len = clone->data_len = head->data_len - plen;
+               head->data_len -= clone->len;
+               head->len -= clone->len;
+               clone->csum = 0;
+               clone->ip_summed = head->ip_summed;
+
+               NFCT_FRAG6_CB(clone)->orig = NULL;
+               atomic_add(clone->truesize, &nf_ct_frag6_mem);
+       }
+
+       /* We have to remove fragment header from datagram and to relocate
+        * header in order to calculate ICV correctly. */
+       head->nh.raw[fq->nhoffset] = head->h.raw[0];
+       memmove(head->head + sizeof(struct frag_hdr), head->head, 
+               (head->data - head->head) - sizeof(struct frag_hdr));
+       head->mac.raw += sizeof(struct frag_hdr);
+       head->nh.raw += sizeof(struct frag_hdr);
+
+       skb_shinfo(head)->frag_list = head->next;
+       head->h.raw = head->data;
+       skb_push(head, head->data - head->nh.raw);
+       atomic_sub(head->truesize, &nf_ct_frag6_mem);
+
+       for (fp=head->next; fp; fp = fp->next) {
+               head->data_len += fp->len;
+               head->len += fp->len;
+               if (head->ip_summed != fp->ip_summed)
+                       head->ip_summed = CHECKSUM_NONE;
+               else if (head->ip_summed == CHECKSUM_HW)
+                       head->csum = csum_add(head->csum, fp->csum);
+               head->truesize += fp->truesize;
+               atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+       }
+
+       head->next = NULL;
+       head->dev = dev;
+       skb_set_timestamp(head, &fq->stamp);
+       head->nh.ipv6h->payload_len = htons(payload_len);
+
+       /* Yes, and fold redundant checksum back. 8) */
+       if (head->ip_summed == CHECKSUM_HW)
+               head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+
+       fq->fragments = NULL;
+
+       /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+       fp = skb_shinfo(head)->frag_list;
+       if (NFCT_FRAG6_CB(fp)->orig == NULL)
+               /* at above code, head skb is divided into two skbs. */
+               fp = fp->next;
+
+       op = NFCT_FRAG6_CB(head)->orig;
+       for (; fp; fp = fp->next) {
+               struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
+
+               op->next = orig;
+               op = orig;
+               NFCT_FRAG6_CB(fp)->orig = NULL;
+       }
+
+       return head;
+
+out_oversize:
+       if (net_ratelimit())
+               printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
+       goto out_fail;
+out_oom:
+       if (net_ratelimit())
+               printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
+out_fail:
+       return NULL;
+}
+
+/*
+ * find the header just before Fragment Header.
+ *
+ * if success return 0 and set ...
+ * (*prevhdrp): the value of "Next Header Field" in the header
+ *             just before Fragment Header.
+ * (*prevhoff): the offset of "Next Header Field" in the header
+ *             just before Fragment Header.
+ * (*fhoff)   : the offset of Fragment Header.
+ *
+ * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
+ *
+ */
+static int
+find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
+{
+        u8 nexthdr = skb->nh.ipv6h->nexthdr;
+       u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
+       int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+       int len = skb->len - start;
+       u8 prevhdr = NEXTHDR_IPV6;
+
+        while (nexthdr != NEXTHDR_FRAGMENT) {
+                struct ipv6_opt_hdr hdr;
+                int hdrlen;
+
+               if (!ipv6_ext_hdr(nexthdr)) {
+                       return -1;
+               }
+                if (len < (int)sizeof(struct ipv6_opt_hdr)) {
+                       DEBUGP("too short\n");
+                       return -1;
+               }
+                if (nexthdr == NEXTHDR_NONE) {
+                       DEBUGP("next header is none\n");
+                       return -1;
+               }
+                if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+                        BUG();
+                if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hdr.hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(&hdr);
+
+               prevhdr = nexthdr;
+               prev_nhoff = start;
+
+                nexthdr = hdr.nexthdr;
+                len -= hdrlen;
+                start += hdrlen;
+        }
+
+       if (len < 0)
+               return -1;
+
+       *prevhdrp = prevhdr;
+       *prevhoff = prev_nhoff;
+       *fhoff = start;
+
+       return 0;
+}
+
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
+{
+       struct sk_buff *clone; 
+       struct net_device *dev = skb->dev;
+       struct frag_hdr *fhdr;
+       struct nf_ct_frag6_queue *fq;
+       struct ipv6hdr *hdr;
+       int fhoff, nhoff;
+       u8 prevhdr;
+       struct sk_buff *ret_skb = NULL;
+
+       /* Jumbo payload inhibits frag. header */
+       if (skb->nh.ipv6h->payload_len == 0) {
+               DEBUGP("payload len = 0\n");
+               return skb;
+       }
+
+       if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
+               return skb;
+
+       clone = skb_clone(skb, GFP_ATOMIC);
+       if (clone == NULL) {
+               DEBUGP("Can't clone skb\n");
+               return skb;
+       }
+
+       NFCT_FRAG6_CB(clone)->orig = skb;
+
+       if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
+               DEBUGP("message is too short.\n");
+               goto ret_orig;
+       }
+
+       clone->h.raw = clone->data + fhoff;
+       hdr = clone->nh.ipv6h;
+       fhdr = (struct frag_hdr *)clone->h.raw;
+
+       if (!(fhdr->frag_off & htons(0xFFF9))) {
+               DEBUGP("Invalid fragment offset\n");
+               /* It is not a fragmented frame */
+               goto ret_orig;
+       }
+
+       if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+               nf_ct_frag6_evictor();
+
+       fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
+       if (fq == NULL) {
+               DEBUGP("Can't find and can't create new queue\n");
+               goto ret_orig;
+       }
+
+       spin_lock(&fq->lock);
+
+       if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+               spin_unlock(&fq->lock);
+               DEBUGP("Can't insert skb to queue\n");
+               fq_put(fq);
+               goto ret_orig;
+       }
+
+       if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+               ret_skb = nf_ct_frag6_reasm(fq, dev);
+               if (ret_skb == NULL)
+                       DEBUGP("Can't reassemble fragmented packets\n");
+       }
+       spin_unlock(&fq->lock);
+
+       fq_put(fq);
+       return ret_skb;
+
+ret_orig:
+       kfree_skb(clone);
+       return skb;
+}
+
+void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+                       struct net_device *in, struct net_device *out,
+                       int (*okfn)(struct sk_buff *))
+{
+       struct sk_buff *s, *s2;
+
+       for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
+               nf_conntrack_put_reasm(s->nfct_reasm);
+               nf_conntrack_get_reasm(skb);
+               s->nfct_reasm = skb;
+
+               s2 = s->next;
+               NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
+                              NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+               s = s2;
+       }
+       nf_conntrack_put_reasm(skb);
+}
+
+int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
+{
+       struct sk_buff *s, *s2;
+
+       for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
+
+               s2 = s->next;
+               kfree_skb(s);
+       }
+
+       kfree_skb(skb);
+
+       return 0;
+}
+
+int nf_ct_frag6_init(void)
+{
+       nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+                                  (jiffies ^ (jiffies >> 6)));
+
+       init_timer(&nf_ct_frag6_secret_timer);
+       nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+       nf_ct_frag6_secret_timer.expires = jiffies
+                                          + nf_ct_frag6_secret_interval;
+       add_timer(&nf_ct_frag6_secret_timer);
+
+       return 0;
+}
+
+void nf_ct_frag6_cleanup(void)
+{
+       del_timer(&nf_ct_frag6_secret_timer);
+       nf_ct_frag6_evictor();
+}
index a1265a320b1170a43e1db063d573e20944582ca2..651c79b41eeb1500241532dda6cedc700e71ebc3 100644 (file)
@@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
                        struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
                        /* Not releasing hash table! */
-                       if (clone)
+                       if (clone) {
+                               nf_reset(clone);
                                rawv6_rcv(sk, clone);
+                       }
                }
                sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
                                     IP6CB(skb)->iif);
index 227e99ed510cdabe8f776a0e22e4bbce0b5a1201..f7f42c3e96cb537f5cd885610d00972916e6e591 100644 (file)
@@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
 static int fib6_dump_done(struct netlink_callback *cb)
 {
        fib6_dump_end(cb);
-       return cb->done(cb);
+       return cb->done ? cb->done(cb) : 0;
 }
 
 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
index 8296b38bf2701fdd81aa275a18fcbc1b26a7d787..a84f9221e5f02d4e5d41640f010d0472346dd85f 100644 (file)
@@ -1,3 +1,6 @@
+menu "Core Netfilter Configuration"
+       depends on NET && NETFILTER
+
 config NETFILTER_NETLINK
        tristate "Netfilter netlink interface"
        help
@@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG
          and is also scheduled to replace the old syslog-based ipt_LOG
          and ip6t_LOG modules.
 
+config NF_CONNTRACK
+       tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && IP_NF_CONNTRACK=n
+       default n
+       ---help---
+         Connection tracking keeps a record of what packets have passed
+         through your machine, in order to figure out how they are related
+         into connections.
+
+         Layer 3 independent connection tracking is experimental scheme
+         which generalize ip_conntrack to support other layer 3 protocols.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CT_ACCT
+       bool "Connection tracking flow accounting"
+       depends on NF_CONNTRACK
+       help
+         If this option is enabled, the connection tracking code will
+         keep per-flow packet and byte counters.
+
+         Those counters can be used for flow-based accounting or the
+         `connbytes' match.
+
+         If unsure, say `N'.
+
+config NF_CONNTRACK_MARK
+       bool  'Connection mark tracking support'
+       depends on NF_CONNTRACK
+       help
+         This option enables support for connection marks, used by the
+         `CONNMARK' target and `connmark' match. Similar to the mark value
+         of packets, but this mark value is kept in the conntrack session
+         instead of the individual packets.
+
+config NF_CONNTRACK_EVENTS
+       bool "Connection tracking events"
+       depends on NF_CONNTRACK
+       help
+         If this option is enabled, the connection tracking code will
+         provide a notifier chain that can be used by other kernel code
+         to get notified aboutchanges in the connection tracking state.
+
+         If unsure, say `N'.
+
+config NF_CT_PROTO_SCTP
+       tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)'
+       depends on EXPERIMENTAL && NF_CONNTRACK
+       default n
+       help
+         With this option enabled, the layer 3 independent connection
+         tracking code will be able to do state tracking on SCTP connections.
+
+         If you want to compile it as a module, say M here and read
+         Documentation/modules.txt.  If unsure, say `N'.
+
+config NF_CONNTRACK_FTP
+       tristate "FTP support on new connection tracking (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && NF_CONNTRACK
+       help
+         Tracking FTP connections is problematic: special helpers are
+         required for tracking them, and doing masquerading and other forms
+         of Network Address Translation on them.
+
+         This is FTP support on Layer 3 independent connection tracking.
+         Layer 3 independent connection tracking is experimental scheme
+         which generalize ip_conntrack to support other layer 3 protocols.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
+endmenu
index b3b44f8b415a6f1ea495a82d137833d3b7f9ee6f..55f019ad2c086acffbd9de3fdee90ef93295592f 100644 (file)
@@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
+
+nf_conntrack-objs      := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
+
+obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
+obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
+
+# SCTP protocol connection tracking
+obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
new file mode 100644 (file)
index 0000000..9a67c79
--- /dev/null
@@ -0,0 +1,1538 @@
+/* Connection state tracking for netfilter.  This is separated from,
+   but required by, the NAT layer; it can also be used by an iptables
+   extension. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ *     - new API and handling of conntrack/nat helpers
+ *     - now capable of multiple expectations for one master
+ * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
+ *     - add usage/reference counts to ip_conntrack_expect
+ *     - export ip_conntrack[_expect]_{find_get,put} functions
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol denendent part.
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - add support various size of conntrack structures.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_core.c
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+
+/* This rwlock protects the main hash table, protocol/helper/expected
+   registrations, conntrack timers*/
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#define NF_CONNTRACK_VERSION   "0.4.1"
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DEFINE_RWLOCK(nf_conntrack_lock);
+
+/* nf_conntrack_standalone needs this */
+atomic_t nf_conntrack_count = ATOMIC_INIT(0);
+
+void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
+LIST_HEAD(nf_conntrack_expect_list);
+struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
+struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
+static LIST_HEAD(helpers);
+unsigned int nf_conntrack_htable_size = 0;
+int nf_conntrack_max;
+struct list_head *nf_conntrack_hash;
+static kmem_cache_t *nf_conntrack_expect_cachep;
+struct nf_conn nf_conntrack_untracked;
+unsigned int nf_ct_log_invalid;
+static LIST_HEAD(unconfirmed);
+static int nf_conntrack_vmalloc;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+struct notifier_block *nf_conntrack_chain;
+struct notifier_block *nf_conntrack_expect_chain;
+
+DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+{
+       DEBUGP("ecache: delivering events for %p\n", ecache->ct);
+       if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
+           && ecache->events)
+               notifier_call_chain(&nf_conntrack_chain, ecache->events,
+                                   ecache->ct);
+
+       ecache->events = 0;
+       nf_ct_put(ecache->ct);
+       ecache->ct = NULL;
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling for freeing the skb */
+void nf_ct_deliver_cached_events(const struct nf_conn *ct)
+{
+       struct nf_conntrack_ecache *ecache;
+
+       local_bh_disable();
+       ecache = &__get_cpu_var(nf_conntrack_ecache);
+       if (ecache->ct == ct)
+               __nf_ct_deliver_cached_events(ecache);
+       local_bh_enable();
+}
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void __nf_ct_event_cache_init(struct nf_conn *ct)
+{
+       struct nf_conntrack_ecache *ecache;
+       
+       /* take care of delivering potentially old events */
+       ecache = &__get_cpu_var(nf_conntrack_ecache);
+       BUG_ON(ecache->ct == ct);
+       if (ecache->ct)
+               __nf_ct_deliver_cached_events(ecache);
+       /* initialize for this conntrack/packet */
+       ecache->ct = ct;
+       nf_conntrack_get(&ct->ct_general);
+}
+
+/* flush the event cache - touches other CPU's data and must not be called
+ * while packets are still passing through the code */
+static void nf_ct_event_cache_flush(void)
+{
+       struct nf_conntrack_ecache *ecache;
+       int cpu;
+
+       for_each_cpu(cpu) {
+               ecache = &per_cpu(nf_conntrack_ecache, cpu);
+               if (ecache->ct)
+                       nf_ct_put(ecache->ct);
+       }
+}
+#else
+static inline void nf_ct_event_cache_flush(void) {}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
+
+/*
+ * This scheme offers various size of "struct nf_conn" dependent on
+ * features(helper, nat, ...)
+ */
+
+#define NF_CT_FEATURES_NAMELEN 256
+static struct {
+       /* name of slab cache. printed in /proc/slabinfo */
+       char *name;
+
+       /* size of slab cache */
+       size_t size;
+
+       /* slab cache pointer */
+       kmem_cache_t *cachep;
+
+       /* allocated slab cache + modules which uses this slab cache */
+       int use;
+
+       /* Initialization */
+       int (*init_conntrack)(struct nf_conn *, u_int32_t);
+
+} nf_ct_cache[NF_CT_F_NUM];
+
+/* protect members of nf_ct_cache except of "use" */
+DEFINE_RWLOCK(nf_ct_cache_lock);
+
+/* This avoids calling kmem_cache_create() with same name simultaneously */
+DECLARE_MUTEX(nf_ct_cache_mutex);
+
+extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
+struct nf_conntrack_protocol *
+nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
+{
+       if (unlikely(nf_ct_protos[l3proto] == NULL))
+               return &nf_conntrack_generic_protocol;
+
+       return nf_ct_protos[l3proto][protocol];
+}
+
+static int nf_conntrack_hash_rnd_initted;
+static unsigned int nf_conntrack_hash_rnd;
+
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+                                 unsigned int size, unsigned int rnd)
+{
+       unsigned int a, b;
+       a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
+                 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
+       b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
+                       (tuple->src.u.all << 16) | tuple->dst.u.all);
+
+       return jhash_2words(a, b, rnd) % size;
+}
+
+static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+{
+       return __hash_conntrack(tuple, nf_conntrack_htable_size,
+                               nf_conntrack_hash_rnd);
+}
+
+/* Initialize "struct nf_conn" which has spaces for helper */
+static int
+init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features)
+{
+
+       conntrack->help = (union nf_conntrack_help *)
+               (((unsigned long)conntrack->data
+                 + (__alignof__(union nf_conntrack_help) - 1))
+                & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1))));
+       return 0;
+}
+
+int nf_conntrack_register_cache(u_int32_t features, const char *name,
+                               size_t size,
+                               int (*init)(struct nf_conn *, u_int32_t))
+{
+       int ret = 0;
+       char *cache_name;
+       kmem_cache_t *cachep;
+
+       DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
+              features, name, size);
+
+       if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
+               DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
+                       features);
+               return -EINVAL;
+       }
+
+       down(&nf_ct_cache_mutex);
+
+       write_lock_bh(&nf_ct_cache_lock);
+       /* e.g: multiple helpers are loaded */
+       if (nf_ct_cache[features].use > 0) {
+               DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
+               if ((!strncmp(nf_ct_cache[features].name, name,
+                             NF_CT_FEATURES_NAMELEN))
+                   && nf_ct_cache[features].size == size
+                   && nf_ct_cache[features].init_conntrack == init) {
+                       DEBUGP("nf_conntrack_register_cache: reusing.\n");
+                       nf_ct_cache[features].use++;
+                       ret = 0;
+               } else
+                       ret = -EBUSY;
+
+               write_unlock_bh(&nf_ct_cache_lock);
+               up(&nf_ct_cache_mutex);
+               return ret;
+       }
+       write_unlock_bh(&nf_ct_cache_lock);
+
+       /*
+        * The memory space for name of slab cache must be alive until
+        * cache is destroyed.
+        */
+       cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
+       if (cache_name == NULL) {
+               DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
+               ret = -ENOMEM;
+               goto out_up_mutex;
+       }
+
+       if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
+                                               >= NF_CT_FEATURES_NAMELEN) {
+               printk("nf_conntrack_register_cache: name too long\n");
+               ret = -EINVAL;
+               goto out_free_name;
+       }
+
+       cachep = kmem_cache_create(cache_name, size, 0, 0,
+                                  NULL, NULL);
+       if (!cachep) {
+               printk("nf_conntrack_register_cache: Can't create slab cache "
+                      "for the features = 0x%x\n", features);
+               ret = -ENOMEM;
+               goto out_free_name;
+       }
+
+       write_lock_bh(&nf_ct_cache_lock);
+       nf_ct_cache[features].use = 1;
+       nf_ct_cache[features].size = size;
+       nf_ct_cache[features].init_conntrack = init;
+       nf_ct_cache[features].cachep = cachep;
+       nf_ct_cache[features].name = cache_name;
+       write_unlock_bh(&nf_ct_cache_lock);
+
+       goto out_up_mutex;
+
+out_free_name:
+       kfree(cache_name);
+out_up_mutex:
+       up(&nf_ct_cache_mutex);
+       return ret;
+}
+
+/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
+void nf_conntrack_unregister_cache(u_int32_t features)
+{
+       kmem_cache_t *cachep;
+       char *name;
+
+       /*
+        * This assures that kmem_cache_create() isn't called before destroying
+        * slab cache.
+        */
+       DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
+       down(&nf_ct_cache_mutex);
+
+       write_lock_bh(&nf_ct_cache_lock);
+       if (--nf_ct_cache[features].use > 0) {
+               write_unlock_bh(&nf_ct_cache_lock);
+               up(&nf_ct_cache_mutex);
+               return;
+       }
+       cachep = nf_ct_cache[features].cachep;
+       name = nf_ct_cache[features].name;
+       nf_ct_cache[features].cachep = NULL;
+       nf_ct_cache[features].name = NULL;
+       nf_ct_cache[features].init_conntrack = NULL;
+       nf_ct_cache[features].size = 0;
+       write_unlock_bh(&nf_ct_cache_lock);
+
+       synchronize_net();
+
+       kmem_cache_destroy(cachep);
+       kfree(name);
+
+       up(&nf_ct_cache_mutex);
+}
+
+int
+nf_ct_get_tuple(const struct sk_buff *skb,
+               unsigned int nhoff,
+               unsigned int dataoff,
+               u_int16_t l3num,
+               u_int8_t protonum,
+               struct nf_conntrack_tuple *tuple,
+               const struct nf_conntrack_l3proto *l3proto,
+               const struct nf_conntrack_protocol *protocol)
+{
+       NF_CT_TUPLE_U_BLANK(tuple);
+
+       tuple->src.l3num = l3num;
+       if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+               return 0;
+
+       tuple->dst.protonum = protonum;
+       tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+       return protocol->pkt_to_tuple(skb, dataoff, tuple);
+}
+
+int
+nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
+                  const struct nf_conntrack_tuple *orig,
+                  const struct nf_conntrack_l3proto *l3proto,
+                  const struct nf_conntrack_protocol *protocol)
+{
+       NF_CT_TUPLE_U_BLANK(inverse);
+
+       inverse->src.l3num = orig->src.l3num;
+       if (l3proto->invert_tuple(inverse, orig) == 0)
+               return 0;
+
+       inverse->dst.dir = !orig->dst.dir;
+
+       inverse->dst.protonum = orig->dst.protonum;
+       return protocol->invert_tuple(inverse, orig);
+}
+
+/* nf_conntrack_expect helper functions */
+static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+{
+       ASSERT_WRITE_LOCK(&nf_conntrack_lock);
+       NF_CT_ASSERT(!timer_pending(&exp_timeout));
+       list_del(&exp->list);
+       NF_CT_STAT_INC(expect_delete);
+       exp->master->expecting--;
+       nf_conntrack_expect_put(exp);
+}
+
+static void expectation_timed_out(unsigned long ul_expect)
+{
+       struct nf_conntrack_expect *exp = (void *)ul_expect;
+
+       write_lock_bh(&nf_conntrack_lock);
+       nf_ct_unlink_expect(exp);
+       write_unlock_bh(&nf_conntrack_lock);
+       nf_conntrack_expect_put(exp);
+}
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+static struct nf_conntrack_expect *
+find_expectation(const struct nf_conntrack_tuple *tuple)
+{
+       struct nf_conntrack_expect *i;
+
+       list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+       /* If master is not in hash table yet (ie. packet hasn't left
+          this machine yet), how can other end know about expected?
+          Hence these are not the droids you are looking for (if
+          master ct never got confirmed, we'd hold a reference to it
+          and weird things would happen to future packets). */
+               if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
+                   && nf_ct_is_confirmed(i->master)) {
+                       if (i->flags & NF_CT_EXPECT_PERMANENT) {
+                               atomic_inc(&i->use);
+                               return i;
+                       } else if (del_timer(&i->timeout)) {
+                               nf_ct_unlink_expect(i);
+                               return i;
+                       }
+               }
+       }
+       return NULL;
+}
+
+/* delete all expectations for this conntrack */
+static void remove_expectations(struct nf_conn *ct)
+{
+       struct nf_conntrack_expect *i, *tmp;
+
+       /* Optimization: most connection never expect any others. */
+       if (ct->expecting == 0)
+               return;
+
+       list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
+               if (i->master == ct && del_timer(&i->timeout)) {
+                       nf_ct_unlink_expect(i);
+                       nf_conntrack_expect_put(i);
+               }
+       }
+}
+
+static void
+clean_from_lists(struct nf_conn *ct)
+{
+       unsigned int ho, hr;
+       
+       DEBUGP("clean_from_lists(%p)\n", ct);
+       ASSERT_WRITE_LOCK(&nf_conntrack_lock);
+
+       ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+       hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+       LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+       LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+
+       /* Destroy all pending expectations */
+       remove_expectations(ct);
+}
+
+static void
+destroy_conntrack(struct nf_conntrack *nfct)
+{
+       struct nf_conn *ct = (struct nf_conn *)nfct;
+       struct nf_conntrack_l3proto *l3proto;
+       struct nf_conntrack_protocol *proto;
+
+       DEBUGP("destroy_conntrack(%p)\n", ct);
+       NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+       NF_CT_ASSERT(!timer_pending(&ct->timeout));
+
+       nf_conntrack_event(IPCT_DESTROY, ct);
+       set_bit(IPS_DYING_BIT, &ct->status);
+
+       /* To make sure we don't get any weird locking issues here:
+        * destroy_conntrack() MUST NOT be called with a write lock
+        * to nf_conntrack_lock!!! -HW */
+       l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
+       if (l3proto && l3proto->destroy)
+               l3proto->destroy(ct);
+
+       proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
+                                ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+       if (proto && proto->destroy)
+               proto->destroy(ct);
+
+       if (nf_conntrack_destroyed)
+               nf_conntrack_destroyed(ct);
+
+       write_lock_bh(&nf_conntrack_lock);
+       /* Expectations will have been removed in clean_from_lists,
+        * except TFTP can create an expectation on the first packet,
+        * before connection is in the list, so we need to clean here,
+        * too. */
+       remove_expectations(ct);
+
+       /* We overload first tuple to link into unconfirmed list. */
+       if (!nf_ct_is_confirmed(ct)) {
+               BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
+               list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+       }
+
+       NF_CT_STAT_INC(delete);
+       write_unlock_bh(&nf_conntrack_lock);
+
+       if (ct->master)
+               nf_ct_put(ct->master);
+
+       DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+       nf_conntrack_free(ct);
+}
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+       struct nf_conn *ct = (void *)ul_conntrack;
+
+       write_lock_bh(&nf_conntrack_lock);
+       /* Inside lock so preempt is disabled on module removal path.
+        * Otherwise we can get spurious warnings. */
+       NF_CT_STAT_INC(delete_list);
+       clean_from_lists(ct);
+       write_unlock_bh(&nf_conntrack_lock);
+       nf_ct_put(ct);
+}
+
+static inline int
+conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
+                   const struct nf_conntrack_tuple *tuple,
+                   const struct nf_conn *ignored_conntrack)
+{
+       ASSERT_READ_LOCK(&nf_conntrack_lock);
+       return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
+               && nf_ct_tuple_equal(tuple, &i->tuple);
+}
+
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
+                   const struct nf_conn *ignored_conntrack)
+{
+       struct nf_conntrack_tuple_hash *h;
+       unsigned int hash = hash_conntrack(tuple);
+
+       ASSERT_READ_LOCK(&nf_conntrack_lock);
+       list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
+               if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+                       NF_CT_STAT_INC(found);
+                       return h;
+               }
+               NF_CT_STAT_INC(searched);
+       }
+
+       return NULL;
+}
+
+/* Find a connection corresponding to a tuple. */
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
+                     const struct nf_conn *ignored_conntrack)
+{
+       struct nf_conntrack_tuple_hash *h;
+
+       read_lock_bh(&nf_conntrack_lock);
+       h = __nf_conntrack_find(tuple, ignored_conntrack);
+       if (h)
+               atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+       read_unlock_bh(&nf_conntrack_lock);
+
+       return h;
+}
+
+/* Confirm a connection given skb; places it in hash table */
+int
+__nf_conntrack_confirm(struct sk_buff **pskb)
+{
+       unsigned int hash, repl_hash;
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       ct = nf_ct_get(*pskb, &ctinfo);
+
+       /* ipt_REJECT uses nf_conntrack_attach to attach related
+          ICMP/TCP RST packets in other direction.  Actual packet
+          which created connection will be IP_CT_NEW or for an
+          expected connection, IP_CT_RELATED. */
+       if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+               return NF_ACCEPT;
+
+       hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+       repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+       /* We're not in hash table, and we refuse to set up related
+          connections for unconfirmed conns.  But packet copies and
+          REJECT will give spurious warnings here. */
+       /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
+
+       /* No external references means noone else could have
+          confirmed us. */
+       NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+       DEBUGP("Confirming conntrack %p\n", ct);
+
+       write_lock_bh(&nf_conntrack_lock);
+
+       /* See if there's one in the list already, including reverse:
+          NAT could have grabbed it without realizing, since we're
+          not in the hash.  If there is, we lost race. */
+       if (!LIST_FIND(&nf_conntrack_hash[hash],
+                      conntrack_tuple_cmp,
+                      struct nf_conntrack_tuple_hash *,
+                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+           && !LIST_FIND(&nf_conntrack_hash[repl_hash],
+                         conntrack_tuple_cmp,
+                         struct nf_conntrack_tuple_hash *,
+                         &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+               /* Remove from unconfirmed list */
+               list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+               list_prepend(&nf_conntrack_hash[hash],
+                            &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+               list_prepend(&nf_conntrack_hash[repl_hash],
+                            &ct->tuplehash[IP_CT_DIR_REPLY]);
+               /* Timer relative to confirmation time, not original
+                  setting time, otherwise we'd get timer wrap in
+                  weird delay cases. */
+               ct->timeout.expires += jiffies;
+               add_timer(&ct->timeout);
+               atomic_inc(&ct->ct_general.use);
+               set_bit(IPS_CONFIRMED_BIT, &ct->status);
+               NF_CT_STAT_INC(insert);
+               write_unlock_bh(&nf_conntrack_lock);
+               if (ct->helper)
+                       nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_NF_NAT_NEEDED
+               if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+                   test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+                       nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+               nf_conntrack_event_cache(master_ct(ct) ?
+                                        IPCT_RELATED : IPCT_NEW, *pskb);
+               return NF_ACCEPT;
+       }
+
+       NF_CT_STAT_INC(insert_failed);
+       write_unlock_bh(&nf_conntrack_lock);
+       return NF_DROP;
+}
+
+/* Returns true if a connection correspondings to the tuple (required
+   for NAT). */
+int
+nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
+                        const struct nf_conn *ignored_conntrack)
+{
+       struct nf_conntrack_tuple_hash *h;
+
+       read_lock_bh(&nf_conntrack_lock);
+       h = __nf_conntrack_find(tuple, ignored_conntrack);
+       read_unlock_bh(&nf_conntrack_lock);
+
+       return h != NULL;
+}
+
+/* There's a small race here where we may free a just-assured
+   connection.  Too bad: we're in trouble anyway. */
+static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
+{
+       return !(test_bit(IPS_ASSURED_BIT,
+                         &nf_ct_tuplehash_to_ctrack(i)->status));
+}
+
+static int early_drop(struct list_head *chain)
+{
+       /* Traverse backwards: gives us oldest, which is roughly LRU */
+       struct nf_conntrack_tuple_hash *h;
+       struct nf_conn *ct = NULL;
+       int dropped = 0;
+
+       read_lock_bh(&nf_conntrack_lock);
+       h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
+       if (h) {
+               ct = nf_ct_tuplehash_to_ctrack(h);
+               atomic_inc(&ct->ct_general.use);
+       }
+       read_unlock_bh(&nf_conntrack_lock);
+
+       if (!ct)
+               return dropped;
+
+       if (del_timer(&ct->timeout)) {
+               death_by_timeout((unsigned long)ct);
+               dropped = 1;
+               NF_CT_STAT_INC(early_drop);
+       }
+       nf_ct_put(ct);
+       return dropped;
+}
+
+static inline int helper_cmp(const struct nf_conntrack_helper *i,
+                            const struct nf_conntrack_tuple *rtuple)
+{
+       return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
+}
+
+static struct nf_conntrack_helper *
+nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
+{
+       return LIST_FIND(&helpers, helper_cmp,
+                        struct nf_conntrack_helper *,
+                        tuple);
+}
+
+static struct nf_conn *
+__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+                    const struct nf_conntrack_tuple *repl,
+                    const struct nf_conntrack_l3proto *l3proto)
+{
+       struct nf_conn *conntrack = NULL;
+       u_int32_t features = 0;
+
+       if (!nf_conntrack_hash_rnd_initted) {
+               get_random_bytes(&nf_conntrack_hash_rnd, 4);
+               nf_conntrack_hash_rnd_initted = 1;
+       }
+
+       if (nf_conntrack_max
+           && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
+               unsigned int hash = hash_conntrack(orig);
+               /* Try dropping from this hash chain. */
+               if (!early_drop(&nf_conntrack_hash[hash])) {
+                       if (net_ratelimit())
+                               printk(KERN_WARNING
+                                      "nf_conntrack: table full, dropping"
+                                      " packet.\n");
+                       return ERR_PTR(-ENOMEM);
+               }
+       }
+
+       /*  find features needed by this conntrack. */
+       features = l3proto->get_features(orig);
+       read_lock_bh(&nf_conntrack_lock);
+       if (nf_ct_find_helper(repl) != NULL)
+               features |= NF_CT_F_HELP;
+       read_unlock_bh(&nf_conntrack_lock);
+
+       DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
+
+       read_lock_bh(&nf_ct_cache_lock);
+
+       if (!nf_ct_cache[features].use) {
+               DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
+                       features);
+               goto out;
+       }
+
+       conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
+       if (conntrack == NULL) {
+               DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
+               goto out;
+       }
+
+       memset(conntrack, 0, nf_ct_cache[features].size);
+       conntrack->features = features;
+       if (nf_ct_cache[features].init_conntrack &&
+           nf_ct_cache[features].init_conntrack(conntrack, features) < 0) {
+               DEBUGP("nf_conntrack_alloc: failed to init\n");
+               kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
+               conntrack = NULL;
+               goto out;
+       }
+
+       atomic_set(&conntrack->ct_general.use, 1);
+       conntrack->ct_general.destroy = destroy_conntrack;
+       conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
+       conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
+       /* Don't set timer yet: wait for confirmation */
+       init_timer(&conntrack->timeout);
+       conntrack->timeout.data = (unsigned long)conntrack;
+       conntrack->timeout.function = death_by_timeout;
+
+       atomic_inc(&nf_conntrack_count);
+out:
+       read_unlock_bh(&nf_ct_cache_lock);
+       return conntrack;
+}
+
+struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+                                  const struct nf_conntrack_tuple *repl)
+{
+       struct nf_conntrack_l3proto *l3proto;
+
+       l3proto = nf_ct_find_l3proto(orig->src.l3num);
+       return __nf_conntrack_alloc(orig, repl, l3proto);
+}
+
+void nf_conntrack_free(struct nf_conn *conntrack)
+{
+       u_int32_t features = conntrack->features;
+       NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
+       DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
+              conntrack);
+       kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
+       atomic_dec(&nf_conntrack_count);
+}
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+   failed due to stress.  Otherwise it really is unclassifiable. */
+static struct nf_conntrack_tuple_hash *
+init_conntrack(const struct nf_conntrack_tuple *tuple,
+              struct nf_conntrack_l3proto *l3proto,
+              struct nf_conntrack_protocol *protocol,
+              struct sk_buff *skb,
+              unsigned int dataoff)
+{
+       struct nf_conn *conntrack;
+       struct nf_conntrack_tuple repl_tuple;
+       struct nf_conntrack_expect *exp;
+
+       if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
+               DEBUGP("Can't invert tuple.\n");
+               return NULL;
+       }
+
+       conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
+       if (conntrack == NULL || IS_ERR(conntrack)) {
+               DEBUGP("Can't allocate conntrack.\n");
+               return (struct nf_conntrack_tuple_hash *)conntrack;
+       }
+
+       if (!protocol->new(conntrack, skb, dataoff)) {
+               nf_conntrack_free(conntrack);
+               DEBUGP("init conntrack: can't track with proto module\n");
+               return NULL;
+       }
+
+       write_lock_bh(&nf_conntrack_lock);
+       exp = find_expectation(tuple);
+
+       if (exp) {
+               DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
+                       conntrack, exp);
+               /* Welcome, Mr. Bond.  We've been expecting you... */
+               __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
+               conntrack->master = exp->master;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+               conntrack->mark = exp->master->mark;
+#endif
+               nf_conntrack_get(&conntrack->master->ct_general);
+               NF_CT_STAT_INC(expect_new);
+       } else {
+               conntrack->helper = nf_ct_find_helper(&repl_tuple);
+
+               NF_CT_STAT_INC(new);
+        }
+
+       /* Overload tuple linked list to put us in unconfirmed list. */
+       list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+
+       write_unlock_bh(&nf_conntrack_lock);
+
+       if (exp) {
+               if (exp->expectfn)
+                       exp->expectfn(conntrack, exp);
+               nf_conntrack_expect_put(exp);
+       }
+
+       return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+}
+
+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+static inline struct nf_conn *
+resolve_normal_ct(struct sk_buff *skb,
+                 unsigned int dataoff,
+                 u_int16_t l3num,
+                 u_int8_t protonum,
+                 struct nf_conntrack_l3proto *l3proto,
+                 struct nf_conntrack_protocol *proto,
+                 int *set_reply,
+                 enum ip_conntrack_info *ctinfo)
+{
+       struct nf_conntrack_tuple tuple;
+       struct nf_conntrack_tuple_hash *h;
+       struct nf_conn *ct;
+
+       if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
+                            dataoff, l3num, protonum, &tuple, l3proto,
+                            proto)) {
+               DEBUGP("resolve_normal_ct: Can't get tuple\n");
+               return NULL;
+       }
+
+       /* look for tuple match */
+       h = nf_conntrack_find_get(&tuple, NULL);
+       if (!h) {
+               h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
+               if (!h)
+                       return NULL;
+               if (IS_ERR(h))
+                       return (void *)h;
+       }
+       ct = nf_ct_tuplehash_to_ctrack(h);
+
+       /* It exists; we have (non-exclusive) reference. */
+       if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
+               *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+               /* Please set reply bit if this packet OK */
+               *set_reply = 1;
+       } else {
+               /* Once we've had two way comms, always ESTABLISHED. */
+               if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+                       DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
+                       *ctinfo = IP_CT_ESTABLISHED;
+               } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
+                       DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
+                       *ctinfo = IP_CT_RELATED;
+               } else {
+                       DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
+                       *ctinfo = IP_CT_NEW;
+               }
+               *set_reply = 0;
+       }
+       skb->nfct = &ct->ct_general;
+       skb->nfctinfo = *ctinfo;
+       return ct;
+}
+
+unsigned int
+nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conntrack_l3proto *l3proto;
+       struct nf_conntrack_protocol *proto;
+       unsigned int dataoff;
+       u_int8_t protonum;
+       int set_reply = 0;
+       int ret;
+
+       /* Previously seen (loopback or untracked)?  Ignore. */
+       if ((*pskb)->nfct) {
+               NF_CT_STAT_INC(ignore);
+               return NF_ACCEPT;
+       }
+
+       l3proto = nf_ct_find_l3proto((u_int16_t)pf);
+       if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
+               DEBUGP("not prepared to track yet or error occured\n");
+               return -ret;
+       }
+
+       proto = nf_ct_find_proto((u_int16_t)pf, protonum);
+
+       /* It may be an special packet, error, unclean...
+        * inverse of the return code tells to the netfilter
+        * core what to do with the packet. */
+       if (proto->error != NULL &&
+           (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+               NF_CT_STAT_INC(error);
+               NF_CT_STAT_INC(invalid);
+               return -ret;
+       }
+
+       ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
+                              &set_reply, &ctinfo);
+       if (!ct) {
+               /* Not valid part of a connection */
+               NF_CT_STAT_INC(invalid);
+               return NF_ACCEPT;
+       }
+
+       if (IS_ERR(ct)) {
+               /* Too stressed to deal. */
+               NF_CT_STAT_INC(drop);
+               return NF_DROP;
+       }
+
+       NF_CT_ASSERT((*pskb)->nfct);
+
+       ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+       if (ret < 0) {
+               /* Invalid: inverse of the return code tells
+                * the netfilter core what to do */
+               DEBUGP("nf_conntrack_in: Can't track with proto module\n");
+               nf_conntrack_put((*pskb)->nfct);
+               (*pskb)->nfct = NULL;
+               NF_CT_STAT_INC(invalid);
+               return -ret;
+       }
+
+       if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+               nf_conntrack_event_cache(IPCT_STATUS, *pskb);
+
+       return ret;
+}
+
+int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
+                        const struct nf_conntrack_tuple *orig)
+{
+       return nf_ct_invert_tuple(inverse, orig,
+                                 nf_ct_find_l3proto(orig->src.l3num),
+                                 nf_ct_find_proto(orig->src.l3num,
+                                                  orig->dst.protonum));
+}
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct nf_conntrack_expect *a,
+                              const struct nf_conntrack_expect *b)
+{
+       /* Part covered by intersection of masks must be unequal,
+          otherwise they clash */
+       struct nf_conntrack_tuple intersect_mask;
+       int count;
+
+       intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
+       intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
+       intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
+       intersect_mask.dst.protonum = a->mask.dst.protonum
+                                       & b->mask.dst.protonum;
+
+       for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+               intersect_mask.src.u3.all[count] =
+                       a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
+       }
+
+       for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+               intersect_mask.dst.u3.all[count] =
+                       a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
+       }
+
+       return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+}
+
+static inline int expect_matches(const struct nf_conntrack_expect *a,
+                                const struct nf_conntrack_expect *b)
+{
+       return a->master == b->master
+               && nf_ct_tuple_equal(&a->tuple, &b->tuple)
+               && nf_ct_tuple_equal(&a->mask, &b->mask);
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
+{
+       struct nf_conntrack_expect *i;
+
+       write_lock_bh(&nf_conntrack_lock);
+       /* choose the the oldest expectation to evict */
+       list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+               if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+                       nf_ct_unlink_expect(i);
+                       write_unlock_bh(&nf_conntrack_lock);
+                       nf_conntrack_expect_put(i);
+                       return;
+               }
+       }
+       write_unlock_bh(&nf_conntrack_lock);
+}
+
+/* We don't increase the master conntrack refcount for non-fulfilled
+ * conntracks. During the conntrack destruction, the expectations are
+ * always killed before the conntrack itself */
+struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
+{
+       struct nf_conntrack_expect *new;
+
+       new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
+       if (!new) {
+               DEBUGP("expect_related: OOM allocating expect\n");
+               return NULL;
+       }
+       new->master = me;
+       atomic_set(&new->use, 1);
+       return new;
+}
+
+void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+{
+       if (atomic_dec_and_test(&exp->use))
+               kmem_cache_free(nf_conntrack_expect_cachep, exp);
+}
+
+static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
+{
+       atomic_inc(&exp->use);
+       exp->master->expecting++;
+       list_add(&exp->list, &nf_conntrack_expect_list);
+
+       init_timer(&exp->timeout);
+       exp->timeout.data = (unsigned long)exp;
+       exp->timeout.function = expectation_timed_out;
+       exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
+       add_timer(&exp->timeout);
+
+       atomic_inc(&exp->use);
+       NF_CT_STAT_INC(expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct nf_conn *master)
+{
+       struct nf_conntrack_expect *i;
+
+       list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+               if (i->master == master) {
+                       if (del_timer(&i->timeout)) {
+                               nf_ct_unlink_expect(i);
+                               nf_conntrack_expect_put(i);
+                       }
+                       break;
+               }
+       }
+}
+
+static inline int refresh_timer(struct nf_conntrack_expect *i)
+{
+       if (!del_timer(&i->timeout))
+               return 0;
+
+       i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
+       add_timer(&i->timeout);
+       return 1;
+}
+
+int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
+{
+       struct nf_conntrack_expect *i;
+       int ret;
+
+       DEBUGP("nf_conntrack_expect_related %p\n", related_to);
+       DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
+       DEBUGP("mask:  "); NF_CT_DUMP_TUPLE(&expect->mask);
+
+       write_lock_bh(&nf_conntrack_lock);
+       list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+               if (expect_matches(i, expect)) {
+                       /* Refresh timer: if it's dying, ignore.. */
+                       if (refresh_timer(i)) {
+                               ret = 0;
+                               goto out;
+                       }
+               } else if (expect_clash(i, expect)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
+       /* Will be over limit? */
+       if (expect->master->helper->max_expected && 
+           expect->master->expecting >= expect->master->helper->max_expected)
+               evict_oldest_expect(expect->master);
+
+       nf_conntrack_expect_insert(expect);
+       nf_conntrack_expect_event(IPEXP_NEW, expect);
+       ret = 0;
+out:
+       write_unlock_bh(&nf_conntrack_lock);
+       return ret;
+}
+
+/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
+   implicitly racy: see __nf_conntrack_confirm */
+void nf_conntrack_alter_reply(struct nf_conn *conntrack,
+                             const struct nf_conntrack_tuple *newreply)
+{
+       write_lock_bh(&nf_conntrack_lock);
+       /* Should be unconfirmed, so not in hash table yet */
+       NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack));
+
+       DEBUGP("Altering reply tuple of %p to ", conntrack);
+       NF_CT_DUMP_TUPLE(newreply);
+
+       conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+       if (!conntrack->master && conntrack->expecting == 0)
+               conntrack->helper = nf_ct_find_helper(newreply);
+       write_unlock_bh(&nf_conntrack_lock);
+}
+
+int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
+{
+       int ret;
+       BUG_ON(me->timeout == 0);
+
+       ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
+                                         sizeof(struct nf_conn)
+                                         + sizeof(union nf_conntrack_help)
+                                         + __alignof__(union nf_conntrack_help),
+                                         init_conntrack_for_helper);
+       if (ret < 0) {
+               printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
+               return ret;
+       }
+       write_lock_bh(&nf_conntrack_lock);
+       list_prepend(&helpers, me);
+       write_unlock_bh(&nf_conntrack_lock);
+
+       return 0;
+}
+
+static inline int unhelp(struct nf_conntrack_tuple_hash *i,
+                        const struct nf_conntrack_helper *me)
+{
+       if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
+               nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
+               nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
+       }
+       return 0;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+       unsigned int i;
+       struct nf_conntrack_expect *exp, *tmp;
+
+       /* Need write lock here, to delete helper. */
+       write_lock_bh(&nf_conntrack_lock);
+       LIST_DELETE(&helpers, me);
+
+       /* Get rid of expectations */
+       list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
+               if (exp->master->helper == me && del_timer(&exp->timeout)) {
+                       nf_ct_unlink_expect(exp);
+                       nf_conntrack_expect_put(exp);
+               }
+       }
+
+       /* Get rid of expecteds, set helpers to NULL. */
+       LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
+       for (i = 0; i < nf_conntrack_htable_size; i++)
+               LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
+                           struct nf_conntrack_tuple_hash *, me);
+       write_unlock_bh(&nf_conntrack_lock);
+
+       /* Someone could be still looking at the helper in a bh. */
+       synchronize_net();
+}
+
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __nf_ct_refresh_acct(struct nf_conn *ct,
+                         enum ip_conntrack_info ctinfo,
+                         const struct sk_buff *skb,
+                         unsigned long extra_jiffies,
+                         int do_acct)
+{
+       int event = 0;
+
+       NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
+       NF_CT_ASSERT(skb);
+
+       write_lock_bh(&nf_conntrack_lock);
+
+       /* If not in hash table, timer will not be active yet */
+       if (!nf_ct_is_confirmed(ct)) {
+               ct->timeout.expires = extra_jiffies;
+               event = IPCT_REFRESH;
+       } else {
+               /* Need del_timer for race avoidance (may already be dying). */
+               if (del_timer(&ct->timeout)) {
+                       ct->timeout.expires = jiffies + extra_jiffies;
+                       add_timer(&ct->timeout);
+                       event = IPCT_REFRESH;
+               }
+       }
+
+#ifdef CONFIG_NF_CT_ACCT
+       if (do_acct) {
+               ct->counters[CTINFO2DIR(ctinfo)].packets++;
+               ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+                       skb->len - (unsigned int)(skb->nh.raw - skb->data);
+       if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+           || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+               event |= IPCT_COUNTER_FILLING;
+       }
+#endif
+
+       write_unlock_bh(&nf_conntrack_lock);
+
+       /* must be unlocked when calling event cache */
+       if (event)
+               nf_conntrack_event_cache(event, skb);
+}
+
+/* Used by ipt_REJECT and ip6t_REJECT. */
+void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       /* This ICMP is in reverse direction to the packet which caused it */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+               ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+       else
+               ctinfo = IP_CT_RELATED;
+
+       /* Attach to new skbuff, and increment count */
+       nskb->nfct = &ct->ct_general;
+       nskb->nfctinfo = ctinfo;
+       nf_conntrack_get(nskb->nfct);
+}
+
+static inline int
+do_iter(const struct nf_conntrack_tuple_hash *i,
+       int (*iter)(struct nf_conn *i, void *data),
+       void *data)
+{
+       return iter(nf_ct_tuplehash_to_ctrack(i), data);
+}
+
+/* Bring out ya dead! */
+static struct nf_conntrack_tuple_hash *
+get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
+               void *data, unsigned int *bucket)
+{
+       struct nf_conntrack_tuple_hash *h = NULL;
+
+       write_lock_bh(&nf_conntrack_lock);
+       for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+               h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
+                               struct nf_conntrack_tuple_hash *, iter, data);
+               if (h)
+                       break;
+       }
+       if (!h)
+               h = LIST_FIND_W(&unconfirmed, do_iter,
+                               struct nf_conntrack_tuple_hash *, iter, data);
+       if (h)
+               atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+       write_unlock_bh(&nf_conntrack_lock);
+
+       return h;
+}
+
+void
+nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
+{
+       struct nf_conntrack_tuple_hash *h;
+       unsigned int bucket = 0;
+
+       while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
+               struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+               /* Time to push up daises... */
+               if (del_timer(&ct->timeout))
+                       death_by_timeout((unsigned long)ct);
+               /* ... else the timer will get him soon. */
+
+               nf_ct_put(ct);
+       }
+}
+
+static int kill_all(struct nf_conn *i, void *data)
+{
+       return 1;
+}
+
+static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
+{
+       if (vmalloced)
+               vfree(hash);
+       else
+               free_pages((unsigned long)hash, 
+                          get_order(sizeof(struct list_head) * size));
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void nf_conntrack_cleanup(void)
+{
+       int i;
+
+       /* This makes sure all current packets have passed through
+          netfilter framework.  Roll on, two-stage module
+          delete... */
+       synchronize_net();
+
+       nf_ct_event_cache_flush();
+ i_see_dead_people:
+       nf_ct_iterate_cleanup(kill_all, NULL);
+       if (atomic_read(&nf_conntrack_count) != 0) {
+               schedule();
+               goto i_see_dead_people;
+       }
+
+       for (i = 0; i < NF_CT_F_NUM; i++) {
+               if (nf_ct_cache[i].use == 0)
+                       continue;
+
+               NF_CT_ASSERT(nf_ct_cache[i].use == 1);
+               nf_ct_cache[i].use = 1;
+               nf_conntrack_unregister_cache(i);
+       }
+       kmem_cache_destroy(nf_conntrack_expect_cachep);
+       free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
+                           nf_conntrack_htable_size);
+}
+
+static struct list_head *alloc_hashtable(int size, int *vmalloced)
+{
+       struct list_head *hash;
+       unsigned int i;
+
+       *vmalloced = 0; 
+       hash = (void*)__get_free_pages(GFP_KERNEL, 
+                                      get_order(sizeof(struct list_head)
+                                                * size));
+       if (!hash) { 
+               *vmalloced = 1;
+               printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
+               hash = vmalloc(sizeof(struct list_head) * size);
+       }
+
+       if (hash)
+               for (i = 0; i < size; i++) 
+                       INIT_LIST_HEAD(&hash[i]);
+
+       return hash;
+}
+
+int set_hashsize(const char *val, struct kernel_param *kp)
+{
+       int i, bucket, hashsize, vmalloced;
+       int old_vmalloced, old_size;
+       int rnd;
+       struct list_head *hash, *old_hash;
+       struct nf_conntrack_tuple_hash *h;
+
+       /* On boot, we can set this without any fancy locking. */
+       if (!nf_conntrack_htable_size)
+               return param_set_uint(val, kp);
+
+       hashsize = simple_strtol(val, NULL, 0);
+       if (!hashsize)
+               return -EINVAL;
+
+       hash = alloc_hashtable(hashsize, &vmalloced);
+       if (!hash)
+               return -ENOMEM;
+
+       /* We have to rehahs for the new table anyway, so we also can
+        * use a newrandom seed */
+       get_random_bytes(&rnd, 4);
+
+       write_lock_bh(&nf_conntrack_lock);
+       for (i = 0; i < nf_conntrack_htable_size; i++) {
+               while (!list_empty(&nf_conntrack_hash[i])) {
+                       h = list_entry(nf_conntrack_hash[i].next,
+                                      struct nf_conntrack_tuple_hash, list);
+                       list_del(&h->list);
+                       bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+                       list_add_tail(&h->list, &hash[bucket]);
+               }
+       }
+       old_size = nf_conntrack_htable_size;
+       old_vmalloced = nf_conntrack_vmalloc;
+       old_hash = nf_conntrack_hash;
+
+       nf_conntrack_htable_size = hashsize;
+       nf_conntrack_vmalloc = vmalloced;
+       nf_conntrack_hash = hash;
+       nf_conntrack_hash_rnd = rnd;
+       write_unlock_bh(&nf_conntrack_lock);
+
+       free_conntrack_hash(old_hash, old_vmalloced, old_size);
+       return 0;
+}
+
+module_param_call(hashsize, set_hashsize, param_get_uint,
+                 &nf_conntrack_htable_size, 0600);
+
+int __init nf_conntrack_init(void)
+{
+       unsigned int i;
+       int ret;
+
+       /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
+        * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
+       if (!nf_conntrack_htable_size) {
+               nf_conntrack_htable_size
+                       = (((num_physpages << PAGE_SHIFT) / 16384)
+                          / sizeof(struct list_head));
+               if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+                       nf_conntrack_htable_size = 8192;
+               if (nf_conntrack_htable_size < 16)
+                       nf_conntrack_htable_size = 16;
+       }
+       nf_conntrack_max = 8 * nf_conntrack_htable_size;
+
+       printk("nf_conntrack version %s (%u buckets, %d max)\n",
+              NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
+              nf_conntrack_max);
+
+       nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
+                                           &nf_conntrack_vmalloc);
+       if (!nf_conntrack_hash) {
+               printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
+               goto err_out;
+       }
+
+       ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
+                                         sizeof(struct nf_conn), NULL);
+       if (ret < 0) {
+               printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+               goto err_free_hash;
+       }
+
+       nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+                                       sizeof(struct nf_conntrack_expect),
+                                       0, 0, NULL, NULL);
+       if (!nf_conntrack_expect_cachep) {
+               printk(KERN_ERR "Unable to create nf_expect slab cache\n");
+               goto err_free_conntrack_slab;
+       }
+
+       /* Don't NEED lock here, but good form anyway. */
+       write_lock_bh(&nf_conntrack_lock);
+        for (i = 0; i < PF_MAX; i++)
+               nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
+        write_unlock_bh(&nf_conntrack_lock);
+
+       /* Set up fake conntrack:
+           - to never be deleted, not in any hashes */
+       atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+       /*  - and look it like as a confirmed connection */
+       set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+
+       return ret;
+
+err_free_conntrack_slab:
+       nf_conntrack_unregister_cache(NF_CT_F_BASIC);
+err_free_hash:
+       free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
+                           nf_conntrack_htable_size);
+err_out:
+       return -ENOMEM;
+}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
new file mode 100644 (file)
index 0000000..65080e2
--- /dev/null
@@ -0,0 +1,698 @@
+/* FTP extension for connection tracking. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - enable working with Layer 3 protocol independent connection tracking.
+ *     - track EPRT and EPSV commands with IPv6 address.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/ctype.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp connection tracking helper");
+
+/* This is slow, but it's simple. --RR */
+static char *ftp_buffer;
+
+static DEFINE_SPINLOCK(nf_ftp_lock);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+static int loose;
+module_param(loose, int, 0600);
+
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+                               enum ip_conntrack_info ctinfo,
+                               enum ip_ct_ftp_type type,
+                               unsigned int matchoff,
+                               unsigned int matchlen,
+                               struct nf_conntrack_expect *exp,
+                               u32 *seq);
+EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
+                            char);
+
+static struct ftp_search {
+       enum ip_conntrack_dir dir;
+       const char *pattern;
+       size_t plen;
+       char skip;
+       char term;
+       enum ip_ct_ftp_type ftptype;
+       int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
+} search[] = {
+       {
+               IP_CT_DIR_ORIGINAL,
+               "PORT", sizeof("PORT") - 1, ' ', '\r',
+               IP_CT_FTP_PORT,
+               try_rfc959,
+       },
+       {
+               IP_CT_DIR_REPLY,
+               "227 ", sizeof("227 ") - 1, '(', ')',
+               IP_CT_FTP_PASV,
+               try_rfc959,
+       },
+       {
+               IP_CT_DIR_ORIGINAL,
+               "EPRT", sizeof("EPRT") - 1, ' ', '\r',
+               IP_CT_FTP_EPRT,
+               try_eprt,
+       },
+       {
+               IP_CT_DIR_REPLY,
+               "229 ", sizeof("229 ") - 1, '(', ')',
+               IP_CT_FTP_EPSV,
+               try_epsv_response,
+       },
+};
+
+/* This code is based on inet_pton() in glibc-2.2.4 */
+static int
+get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
+{
+       static const char xdigits[] = "0123456789abcdef";
+       u_int8_t tmp[16], *tp, *endp, *colonp;
+       int ch, saw_xdigit;
+       u_int32_t val;
+       size_t clen = 0;
+
+       tp = memset(tmp, '\0', sizeof(tmp));
+       endp = tp + sizeof(tmp);
+       colonp = NULL;
+
+       /* Leading :: requires some special handling. */
+       if (*src == ':'){
+               if (*++src != ':') {
+                       DEBUGP("invalid \":\" at the head of addr\n");
+                       return 0;
+               }
+               clen++;
+       }
+
+       saw_xdigit = 0;
+       val = 0;
+       while ((clen < dlen) && (*src != term)) {
+               const char *pch;
+
+               ch = tolower(*src++);
+               clen++;
+
+                pch = strchr(xdigits, ch);
+                if (pch != NULL) {
+                        val <<= 4;
+                        val |= (pch - xdigits);
+                        if (val > 0xffff)
+                                return 0;
+
+                       saw_xdigit = 1;
+                        continue;
+                }
+               if (ch != ':') {
+                       DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
+                       return 0;
+               }
+
+               if (!saw_xdigit) {
+                       if (colonp) {
+                               DEBUGP("invalid location of \"::\".\n");
+                               return 0;
+                       }
+                       colonp = tp;
+                       continue;
+               } else if (*src == term) {
+                       DEBUGP("trancated IPv6 addr\n");
+                       return 0;
+               }
+
+               if (tp + 2 > endp)
+                       return 0;
+               *tp++ = (u_int8_t) (val >> 8) & 0xff;
+               *tp++ = (u_int8_t) val & 0xff;
+
+               saw_xdigit = 0;
+               val = 0;
+               continue;
+        }
+        if (saw_xdigit) {
+                if (tp + 2 > endp)
+                        return 0;
+                *tp++ = (u_int8_t) (val >> 8) & 0xff;
+                *tp++ = (u_int8_t) val & 0xff;
+        }
+        if (colonp != NULL) {
+                /*
+                 * Since some memmove()'s erroneously fail to handle
+                 * overlapping regions, we'll do the shift by hand.
+                 */
+                const int n = tp - colonp;
+                int i;
+
+                if (tp == endp)
+                        return 0;
+
+                for (i = 1; i <= n; i++) {
+                        endp[- i] = colonp[n - i];
+                        colonp[n - i] = 0;
+                }
+                tp = endp;
+        }
+        if (tp != endp || (*src != term))
+                return 0;
+
+        memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
+        return clen;
+}
+
+static int try_number(const char *data, size_t dlen, u_int32_t array[],
+                      int array_size, char sep, char term)
+{
+       u_int32_t i, len;
+
+       memset(array, 0, sizeof(array[0])*array_size);
+
+       /* Keep data pointing at next char. */
+       for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
+               if (*data >= '0' && *data <= '9') {
+                       array[i] = array[i]*10 + *data - '0';
+               }
+               else if (*data == sep)
+                       i++;
+               else {
+                       /* Unexpected character; true if it's the
+                          terminator and we're finished. */
+                       if (*data == term && i == array_size - 1)
+                               return len;
+
+                       DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
+                              len, i, *data);
+                       return 0;
+               }
+       }
+       DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
+
+       return 0;
+}
+
+/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+static int try_rfc959(const char *data, size_t dlen,
+                     struct nf_conntrack_man *cmd, char term)
+{
+       int length;
+       u_int32_t array[6];
+
+       length = try_number(data, dlen, array, 6, ',', term);
+       if (length == 0)
+               return 0;
+
+       cmd->u3.ip =  htonl((array[0] << 24) | (array[1] << 16) |
+                                   (array[2] << 8) | array[3]);
+       cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
+       return length;
+}
+
+/* Grab port: number up to delimiter */
+static int get_port(const char *data, int start, size_t dlen, char delim,
+                   u_int16_t *port)
+{
+       u_int16_t tmp_port = 0;
+       int i;
+
+       for (i = start; i < dlen; i++) {
+               /* Finished? */
+               if (data[i] == delim) {
+                       if (tmp_port == 0)
+                               break;
+                       *port = htons(tmp_port);
+                       DEBUGP("get_port: return %d\n", tmp_port);
+                       return i + 1;
+               }
+               else if (data[i] >= '0' && data[i] <= '9')
+                       tmp_port = tmp_port*10 + data[i] - '0';
+               else { /* Some other crap */
+                       DEBUGP("get_port: invalid char.\n");
+                       break;
+               }
+       }
+       return 0;
+}
+
+/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
+static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
+                   char term)
+{
+       char delim;
+       int length;
+
+       /* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
+          then delimiter again. */
+       if (dlen <= 3) {
+               DEBUGP("EPRT: too short\n");
+               return 0;
+       }
+       delim = data[0];
+       if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
+               DEBUGP("try_eprt: invalid delimitter.\n");
+               return 0;
+       }
+
+       if ((cmd->l3num == PF_INET && data[1] != '1') ||
+           (cmd->l3num == PF_INET6 && data[1] != '2')) {
+               DEBUGP("EPRT: invalid protocol number.\n");
+               return 0;
+       }
+
+       DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim);
+
+       if (data[1] == '1') {
+               u_int32_t array[4];
+
+               /* Now we have IP address. */
+               length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+               if (length != 0)
+                       cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
+                                          | (array[2] << 8) | array[3]);
+       } else {
+               /* Now we have IPv6 address. */
+               length = get_ipv6_addr(data + 3, dlen - 3,
+                                      (struct in6_addr *)cmd->u3.ip6, delim);
+       }
+
+       if (length == 0)
+               return 0;
+       DEBUGP("EPRT: Got IP address!\n");
+       /* Start offset includes initial "|1|", and trailing delimiter */
+       return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Returns 0, or length of numbers: |||6446| */
+static int try_epsv_response(const char *data, size_t dlen,
+                            struct nf_conntrack_man *cmd, char term)
+{
+       char delim;
+
+       /* Three delimiters. */
+       if (dlen <= 3) return 0;
+       delim = data[0];
+       if (isdigit(delim) || delim < 33 || delim > 126
+           || data[1] != delim || data[2] != delim)
+               return 0;
+
+       return get_port(data, 3, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Return 1 for match, 0 for accept, -1 for partial. */
+static int find_pattern(const char *data, size_t dlen,
+                       const char *pattern, size_t plen,
+                       char skip, char term,
+                       unsigned int *numoff,
+                       unsigned int *numlen,
+                       struct nf_conntrack_man *cmd,
+                       int (*getnum)(const char *, size_t,
+                                     struct nf_conntrack_man *, char))
+{
+       size_t i;
+
+       DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
+       if (dlen == 0)
+               return 0;
+
+       if (dlen <= plen) {
+               /* Short packet: try for partial? */
+               if (strnicmp(data, pattern, dlen) == 0)
+                       return -1;
+               else return 0;
+       }
+
+       if (strnicmp(data, pattern, plen) != 0) {
+#if 0
+               size_t i;
+
+               DEBUGP("ftp: string mismatch\n");
+               for (i = 0; i < plen; i++) {
+                       DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
+                               i, data[i], data[i],
+                               pattern[i], pattern[i]);
+               }
+#endif
+               return 0;
+       }
+
+       DEBUGP("Pattern matches!\n");
+       /* Now we've found the constant string, try to skip
+          to the 'skip' character */
+       for (i = plen; data[i] != skip; i++)
+               if (i == dlen - 1) return -1;
+
+       /* Skip over the last character */
+       i++;
+
+       DEBUGP("Skipped up to `%c'!\n", skip);
+
+       *numoff = i;
+       *numlen = getnum(data + i, dlen - i, cmd, term);
+       if (!*numlen)
+               return -1;
+
+       DEBUGP("Match succeeded!\n");
+       return 1;
+}
+
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
+{
+       unsigned int i;
+
+       for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+               if (info->seq_aft_nl[dir][i] == seq)
+                       return 1;
+       return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+                         struct sk_buff *skb)
+{
+       unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
+
+       /* Look for oldest: if we find exact match, we're done. */
+       for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+               if (info->seq_aft_nl[dir][i] == nl_seq)
+                       return;
+
+               if (oldest == info->seq_aft_nl_num[dir]
+                   || before(info->seq_aft_nl[dir][i], oldest))
+                       oldest = i;
+       }
+
+       if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
+               info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+               nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+       } else if (oldest != NUM_SEQ_TO_REMEMBER) {
+               info->seq_aft_nl[dir][oldest] = nl_seq;
+               nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+       }
+}
+
+static int help(struct sk_buff **pskb,
+               unsigned int protoff,
+               struct nf_conn *ct,
+               enum ip_conntrack_info ctinfo)
+{
+       unsigned int dataoff, datalen;
+       struct tcphdr _tcph, *th;
+       char *fb_ptr;
+       int ret;
+       u32 seq;
+       int dir = CTINFO2DIR(ctinfo);
+       unsigned int matchlen, matchoff;
+       struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info;
+       struct nf_conntrack_expect *exp;
+       struct nf_conntrack_man cmd = {};
+
+       unsigned int i;
+       int found = 0, ends_in_nl;
+
+       /* Until there's been traffic both ways, don't look in packets. */
+       if (ctinfo != IP_CT_ESTABLISHED
+           && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+               DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
+               return NF_ACCEPT;
+       }
+
+       th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+       if (th == NULL)
+               return NF_ACCEPT;
+
+       dataoff = protoff + th->doff * 4;
+       /* No data? */
+       if (dataoff >= (*pskb)->len) {
+               DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
+                       (*pskb)->len);
+               return NF_ACCEPT;
+       }
+       datalen = (*pskb)->len - dataoff;
+
+       spin_lock_bh(&nf_ftp_lock);
+       fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
+       BUG_ON(fb_ptr == NULL);
+
+       ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+       seq = ntohl(th->seq) + datalen;
+
+       /* Look up to see if we're just after a \n. */
+       if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+               /* Now if this ends in \n, update ftp info. */
+               DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
+                      ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
+                      ct_ftp_info->seq_aft_nl[dir][0],
+                      ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
+                      ct_ftp_info->seq_aft_nl[dir][1]);
+               ret = NF_ACCEPT;
+               goto out_update_nl;
+       }
+
+        /* Initialize IP/IPv6 addr to expected address (it's not mentioned
+           in EPSV responses) */
+       cmd.l3num = ct->tuplehash[dir].tuple.src.l3num;
+       memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+              sizeof(cmd.u3.all));
+
+       for (i = 0; i < ARRAY_SIZE(search); i++) {
+               if (search[i].dir != dir) continue;
+
+               found = find_pattern(fb_ptr, datalen,
+                                    search[i].pattern,
+                                    search[i].plen,
+                                    search[i].skip,
+                                    search[i].term,
+                                    &matchoff, &matchlen,
+                                    &cmd,
+                                    search[i].getnum);
+               if (found) break;
+       }
+       if (found == -1) {
+               /* We don't usually drop packets.  After all, this is
+                  connection tracking, not packet filtering.
+                  However, it is necessary for accurate tracking in
+                  this case. */
+               if (net_ratelimit())
+                       printk("conntrack_ftp: partial %s %u+%u\n",
+                              search[i].pattern,
+                              ntohl(th->seq), datalen);
+               ret = NF_DROP;
+               goto out;
+       } else if (found == 0) { /* No match */
+               ret = NF_ACCEPT;
+               goto out_update_nl;
+       }
+
+       DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
+              (int)matchlen, fb_ptr + matchoff,
+              matchlen, ntohl(th->seq) + matchoff);
+
+       exp = nf_conntrack_expect_alloc(ct);
+       if (exp == NULL) {
+               ret = NF_DROP;
+               goto out;
+       }
+
+       /* We refer to the reverse direction ("!dir") tuples here,
+        * because we're expecting something in the other direction.
+        * Doesn't matter unless NAT is happening.  */
+       exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3;
+
+       /* Update the ftp info */
+       if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) &&
+           memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+                    sizeof(cmd.u3.all))) {
+               /* Enrico Scholz's passive FTP to partially RNAT'd ftp
+                   server: it really wants us to connect to a
+                   different IP address.  Simply don't record it for
+                   NAT. */
+               if (cmd.l3num == PF_INET) {
+                       DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
+                              NIPQUAD(cmd.u3.ip),
+                              NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
+               } else {
+                       DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n",
+                              NIP6(*((struct in6_addr *)cmd.u3.ip6)),
+                              NIP6(*((struct in6_addr *)ct->tuplehash[dir]
+                                                       .tuple.src.u3.ip6)));
+               }
+
+               /* Thanks to Cristiano Lincoln Mattos
+                  <lincoln@cesar.org.br> for reporting this potential
+                  problem (DMZ machines opening holes to internal
+                  networks, or the packet filter itself). */
+               if (!loose) {
+                       ret = NF_ACCEPT;
+                       goto out_put_expect;
+               }
+               memcpy(&exp->tuple.dst.u3, &cmd.u3.all,
+                      sizeof(exp->tuple.dst.u3));
+       }
+
+       exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3;
+       exp->tuple.src.l3num = cmd.l3num;
+       exp->tuple.src.u.tcp.port = 0;
+       exp->tuple.dst.u.tcp.port = cmd.u.tcp.port;
+       exp->tuple.dst.protonum = IPPROTO_TCP;
+
+       exp->mask = (struct nf_conntrack_tuple)
+                   { .src = { .l3num = 0xFFFF,
+                              .u = { .tcp = { 0 }},
+                            },
+                     .dst = { .protonum = 0xFF,
+                              .u = { .tcp = { 0xFFFF }},
+                            },
+                   };
+       if (cmd.l3num == PF_INET) {
+               exp->mask.src.u3.ip = 0xFFFFFFFF;
+               exp->mask.dst.u3.ip = 0xFFFFFFFF;
+       } else {
+               memset(exp->mask.src.u3.ip6, 0xFF,
+                      sizeof(exp->mask.src.u3.ip6));
+               memset(exp->mask.dst.u3.ip6, 0xFF,
+                      sizeof(exp->mask.src.u3.ip6));
+       }
+
+       exp->expectfn = NULL;
+       exp->flags = 0;
+
+       /* Now, NAT might want to mangle the packet, and register the
+        * (possibly changed) expectation itself. */
+       if (nf_nat_ftp_hook)
+               ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+                                     matchoff, matchlen, exp, &seq);
+       else {
+               /* Can't expect this?  Best to drop packet now. */
+               if (nf_conntrack_expect_related(exp) != 0)
+                       ret = NF_DROP;
+               else
+                       ret = NF_ACCEPT;
+       }
+
+out_put_expect:
+       nf_conntrack_expect_put(exp);
+
+out_update_nl:
+       /* Now if this ends in \n, update ftp info.  Seq may have been
+        * adjusted by NAT code. */
+       if (ends_in_nl)
+               update_nl_seq(seq, ct_ftp_info, dir, *pskb);
+ out:
+       spin_unlock_bh(&nf_ftp_lock);
+       return ret;
+}
+
+static struct nf_conntrack_helper ftp[MAX_PORTS][2];
+static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")];
+
+/* don't make this __exit, since it's called from __init ! */
+static void fini(void)
+{
+       int i, j;
+       for (i = 0; i < ports_c; i++) {
+               for (j = 0; j < 2; j++) {
+                       if (ftp[i][j].me == NULL)
+                               continue;
+
+                       DEBUGP("nf_ct_ftp: unregistering helper for pf: %d "
+                              "port: %d\n",
+                               ftp[i][j].tuple.src.l3num, ports[i]);
+                       nf_conntrack_helper_unregister(&ftp[i][j]);
+               }
+       }
+
+       kfree(ftp_buffer);
+}
+
+static int __init init(void)
+{
+       int i, j = -1, ret = 0;
+       char *tmpname;
+
+       ftp_buffer = kmalloc(65536, GFP_KERNEL);
+       if (!ftp_buffer)
+               return -ENOMEM;
+
+       if (ports_c == 0)
+               ports[ports_c++] = FTP_PORT;
+
+       /* FIXME should be configurable whether IPv4 and IPv6 FTP connections
+                are tracked or not - YK */
+       for (i = 0; i < ports_c; i++) {
+               memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper));
+
+               ftp[i][0].tuple.src.l3num = PF_INET;
+               ftp[i][1].tuple.src.l3num = PF_INET6;
+               for (j = 0; j < 2; j++) {
+                       ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
+                       ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
+                       ftp[i][j].mask.src.u.tcp.port = 0xFFFF;
+                       ftp[i][j].mask.dst.protonum = 0xFF;
+                       ftp[i][j].max_expected = 1;
+                       ftp[i][j].timeout = 5 * 60;     /* 5 Minutes */
+                       ftp[i][j].me = THIS_MODULE;
+                       ftp[i][j].help = help;
+                       tmpname = &ftp_names[i][j][0];
+                       if (ports[i] == FTP_PORT)
+                               sprintf(tmpname, "ftp");
+                       else
+                               sprintf(tmpname, "ftp-%d", ports[i]);
+                       ftp[i][j].name = tmpname;
+
+                       DEBUGP("nf_ct_ftp: registering helper for pf: %d "
+                              "port: %d\n",
+                               ftp[i][j].tuple.src.l3num, ports[i]);
+                       ret = nf_conntrack_helper_register(&ftp[i][j]);
+                       if (ret) {
+                               printk("nf_ct_ftp: failed to register helper "
+                                      " for pf: %d port: %d\n",
+                                       ftp[i][j].tuple.src.l3num, ports[i]);
+                               fini();
+                               return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
new file mode 100644 (file)
index 0000000..7de4f06
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * Based largely upon the original ip_conntrack code which
+ * had the following copyright information:
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
+
+static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+                               struct nf_conntrack_tuple *tuple)
+{
+       memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
+       memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
+
+       return 1;
+}
+
+static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
+                          const struct nf_conntrack_tuple *orig)
+{
+       memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
+       memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
+
+       return 1;
+}
+
+static int generic_print_tuple(struct seq_file *s,
+                           const struct nf_conntrack_tuple *tuple)
+{
+       return 0;
+}
+
+static int generic_print_conntrack(struct seq_file *s,
+                               const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+static int
+generic_prepare(struct sk_buff **pskb, unsigned int hooknum,
+               unsigned int *dataoff, u_int8_t *protonum)
+{
+       /* Never track !!! */
+       return -NF_ACCEPT;
+}
+
+
+static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple)
+                               
+{
+       return NF_CT_F_BASIC;
+}
+
+struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = {
+       .l3proto         = PF_UNSPEC,
+       .name            = "unknown",
+       .pkt_to_tuple    = generic_pkt_to_tuple,
+       .invert_tuple    = generic_invert_tuple,
+       .print_tuple     = generic_print_tuple,
+       .print_conntrack = generic_print_conntrack,
+       .prepare         = generic_prepare,
+       .get_features    = generic_get_features,
+       .me              = THIS_MODULE,
+};
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
new file mode 100644 (file)
index 0000000..36425f6
--- /dev/null
@@ -0,0 +1,85 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - enable working with L3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+
+unsigned long nf_ct_generic_timeout = 600*HZ;
+
+static int generic_pkt_to_tuple(const struct sk_buff *skb,
+                               unsigned int dataoff,
+                               struct nf_conntrack_tuple *tuple)
+{
+       tuple->src.u.all = 0;
+       tuple->dst.u.all = 0;
+
+       return 1;
+}
+
+static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
+                               const struct nf_conntrack_tuple *orig)
+{
+       tuple->src.u.all = 0;
+       tuple->dst.u.all = 0;
+
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int generic_print_tuple(struct seq_file *s,
+                              const struct nf_conntrack_tuple *tuple)
+{
+       return 0;
+}
+
+/* Print out the private part of the conntrack. */
+static int generic_print_conntrack(struct seq_file *s,
+                                  const struct nf_conn *state)
+{
+       return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int packet(struct nf_conn *conntrack,
+                 const struct sk_buff *skb,
+                 unsigned int dataoff,
+                 enum ip_conntrack_info ctinfo,
+                 int pf,
+                 unsigned int hooknum)
+{
+       nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout);
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int new(struct nf_conn *conntrack, const struct sk_buff *skb,
+              unsigned int dataoff)
+{
+       return 1;
+}
+
+struct nf_conntrack_protocol nf_conntrack_generic_protocol =
+{
+       .l3proto                = PF_UNSPEC,
+       .proto                  = 0,
+       .name                   = "unknown",
+       .pkt_to_tuple           = generic_pkt_to_tuple,
+       .invert_tuple           = generic_invert_tuple,
+       .print_tuple            = generic_print_tuple,
+       .print_conntrack        = generic_print_conntrack,
+       .packet                 = packet,
+       .new                    = new,
+};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
new file mode 100644 (file)
index 0000000..3a600f7
--- /dev/null
@@ -0,0 +1,670 @@
+/*
+ * Connection tracking protocol helper module for SCTP.
+ * 
+ * SCTP is defined in RFC 2960. References to various sections in this code 
+ * are to this RFC.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - enable working with L3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/ip_conntrack_sctp.c
+ */
+
+/*
+ * Added support for proc manipulation of timeouts.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+
+#if 0
+#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.sctp */
+static DEFINE_RWLOCK(sctp_lock);
+
+/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+   closely.  They're more complex. --RR 
+
+   And so for me for SCTP :D -Kiran */
+
+static const char *sctp_conntrack_names[] = {
+       "NONE",
+       "CLOSED",
+       "COOKIE_WAIT",
+       "COOKIE_ECHOED",
+       "ESTABLISHED",
+       "SHUTDOWN_SENT",
+       "SHUTDOWN_RECD",
+       "SHUTDOWN_ACK_SENT",
+};
+
+#define SECS  * HZ
+#define MINS  * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS  * 24 HOURS
+
+static unsigned long nf_ct_sctp_timeout_closed            =  10 SECS;
+static unsigned long nf_ct_sctp_timeout_cookie_wait       =   3 SECS;
+static unsigned long nf_ct_sctp_timeout_cookie_echoed     =   3 SECS;
+static unsigned long nf_ct_sctp_timeout_established       =   5 DAYS;
+static unsigned long nf_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
+static unsigned long nf_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
+static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+
+static unsigned long * sctp_timeouts[]
+= { NULL,                                  /* SCTP_CONNTRACK_NONE  */
+    &nf_ct_sctp_timeout_closed,                   /* SCTP_CONNTRACK_CLOSED */
+    &nf_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
+    &nf_ct_sctp_timeout_cookie_echoed,     /* SCTP_CONNTRACK_COOKIE_ECHOED */
+    &nf_ct_sctp_timeout_established,       /* SCTP_CONNTRACK_ESTABLISHED */
+    &nf_ct_sctp_timeout_shutdown_sent,     /* SCTP_CONNTRACK_SHUTDOWN_SENT */
+    &nf_ct_sctp_timeout_shutdown_recd,     /* SCTP_CONNTRACK_SHUTDOWN_RECD */
+    &nf_ct_sctp_timeout_shutdown_ack_sent  /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
+ };
+
+#define sNO SCTP_CONNTRACK_NONE
+#define        sCL SCTP_CONNTRACK_CLOSED
+#define        sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define        sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define        sES SCTP_CONNTRACK_ESTABLISHED
+#define        sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define        sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define        sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define        sIV SCTP_CONNTRACK_MAX
+
+/* 
+       These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an 
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end 
+point. Please note the subtleties. -Kiran
+
+NONE              - Nothing so far.
+COOKIE WAIT       - We have seen an INIT chunk in the original direction, or also 
+                    an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED     - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED       - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT     - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD     - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+                    to that of the SHUTDOWN chunk.
+CLOSED            - We have seen a SHUTDOWN_COMPLETE chunk in the direction of 
+                    the SHUTDOWN chunk. Connection is closed.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction. 
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from 
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+       {
+/*     ORIGINAL        */
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init         */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
+/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
+/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+       },
+       {
+/*     REPLY   */
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init         */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
+/* init_ack     */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort        */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
+/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+       }
+};
+
+static int sctp_pkt_to_tuple(const struct sk_buff *skb,
+                            unsigned int dataoff,
+                            struct nf_conntrack_tuple *tuple)
+{
+       sctp_sctphdr_t _hdr, *hp;
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       /* Actually only need first 8 bytes. */
+       hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+       if (hp == NULL)
+               return 0;
+
+       tuple->src.u.sctp.port = hp->source;
+       tuple->dst.u.sctp.port = hp->dest;
+       return 1;
+}
+
+static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                            const struct nf_conntrack_tuple *orig)
+{
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       tuple->src.u.sctp.port = orig->dst.u.sctp.port;
+       tuple->dst.u.sctp.port = orig->src.u.sctp.port;
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int sctp_print_tuple(struct seq_file *s,
+                           const struct nf_conntrack_tuple *tuple)
+{
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       return seq_printf(s, "sport=%hu dport=%hu ",
+                         ntohs(tuple->src.u.sctp.port),
+                         ntohs(tuple->dst.u.sctp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int sctp_print_conntrack(struct seq_file *s,
+                               const struct nf_conn *conntrack)
+{
+       enum sctp_conntrack state;
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       read_lock_bh(&sctp_lock);
+       state = conntrack->proto.sctp.state;
+       read_unlock_bh(&sctp_lock);
+
+       return seq_printf(s, "%s ", sctp_conntrack_names[state]);
+}
+
+#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)    \
+for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0;             \
+       offset < skb->len &&                                            \
+       (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));   \
+       offset += (htons(sch->length) + 3) & ~3, count++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct nf_conn *conntrack,
+                          const struct sk_buff *skb,
+                          unsigned int dataoff,
+                          char *map)
+{
+       u_int32_t offset, count;
+       sctp_chunkhdr_t _sch, *sch;
+       int flag;
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       flag = 0;
+
+       for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+               DEBUGP("Chunk Num: %d  Type: %d\n", count, sch->type);
+
+               if (sch->type == SCTP_CID_INIT 
+                       || sch->type == SCTP_CID_INIT_ACK
+                       || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+                       flag = 1;
+               }
+
+               /* Cookie Ack/Echo chunks not the first OR 
+                  Init / Init Ack / Shutdown compl chunks not the only chunks */
+               if ((sch->type == SCTP_CID_COOKIE_ACK 
+                       || sch->type == SCTP_CID_COOKIE_ECHO
+                       || flag)
+                    && count !=0 ) {
+                       DEBUGP("Basic checks failed\n");
+                       return 1;
+               }
+
+               if (map) {
+                       set_bit(sch->type, (void *)map);
+               }
+       }
+
+       DEBUGP("Basic checks passed\n");
+       return 0;
+}
+
+static int new_state(enum ip_conntrack_dir dir,
+                    enum sctp_conntrack cur_state,
+                    int chunk_type)
+{
+       int i;
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       DEBUGP("Chunk type: %d\n", chunk_type);
+
+       switch (chunk_type) {
+               case SCTP_CID_INIT: 
+                       DEBUGP("SCTP_CID_INIT\n");
+                       i = 0; break;
+               case SCTP_CID_INIT_ACK: 
+                       DEBUGP("SCTP_CID_INIT_ACK\n");
+                       i = 1; break;
+               case SCTP_CID_ABORT: 
+                       DEBUGP("SCTP_CID_ABORT\n");
+                       i = 2; break;
+               case SCTP_CID_SHUTDOWN: 
+                       DEBUGP("SCTP_CID_SHUTDOWN\n");
+                       i = 3; break;
+               case SCTP_CID_SHUTDOWN_ACK: 
+                       DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
+                       i = 4; break;
+               case SCTP_CID_ERROR: 
+                       DEBUGP("SCTP_CID_ERROR\n");
+                       i = 5; break;
+               case SCTP_CID_COOKIE_ECHO: 
+                       DEBUGP("SCTP_CID_COOKIE_ECHO\n");
+                       i = 6; break;
+               case SCTP_CID_COOKIE_ACK: 
+                       DEBUGP("SCTP_CID_COOKIE_ACK\n");
+                       i = 7; break;
+               case SCTP_CID_SHUTDOWN_COMPLETE: 
+                       DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
+                       i = 8; break;
+               default:
+                       /* Other chunks like DATA, SACK, HEARTBEAT and
+                       its ACK do not cause a change in state */
+                       DEBUGP("Unknown chunk type, Will stay in %s\n", 
+                                               sctp_conntrack_names[cur_state]);
+                       return cur_state;
+       }
+
+       DEBUGP("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n", 
+                       dir, sctp_conntrack_names[cur_state], chunk_type,
+                       sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+       return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int sctp_packet(struct nf_conn *conntrack,
+                      const struct sk_buff *skb,
+                      unsigned int dataoff,
+                      enum ip_conntrack_info ctinfo,
+                      int pf,
+                      unsigned int hooknum)
+{
+       enum sctp_conntrack newconntrack, oldsctpstate;
+       sctp_sctphdr_t _sctph, *sh;
+       sctp_chunkhdr_t _sch, *sch;
+       u_int32_t offset, count;
+       char map[256 / sizeof (char)] = {0};
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
+       if (sh == NULL)
+               return -1;
+
+       if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
+               return -1;
+
+       /* Check the verification tag (Sec 8.5) */
+       if (!test_bit(SCTP_CID_INIT, (void *)map)
+               && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
+               && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
+               && !test_bit(SCTP_CID_ABORT, (void *)map)
+               && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
+               && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+               DEBUGP("Verification tag check failed\n");
+               return -1;
+       }
+
+       oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
+       for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+               write_lock_bh(&sctp_lock);
+
+               /* Special cases of Verification tag check (Sec 8.5.1) */
+               if (sch->type == SCTP_CID_INIT) {
+                       /* Sec 8.5.1 (A) */
+                       if (sh->vtag != 0) {
+                               write_unlock_bh(&sctp_lock);
+                               return -1;
+                       }
+               } else if (sch->type == SCTP_CID_ABORT) {
+                       /* Sec 8.5.1 (B) */
+                       if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+                               && !(sh->vtag == conntrack->proto.sctp.vtag
+                                                       [1 - CTINFO2DIR(ctinfo)])) {
+                               write_unlock_bh(&sctp_lock);
+                               return -1;
+                       }
+               } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+                       /* Sec 8.5.1 (C) */
+                       if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+                               && !(sh->vtag == conntrack->proto.sctp.vtag
+                                                       [1 - CTINFO2DIR(ctinfo)] 
+                                       && (sch->flags & 1))) {
+                               write_unlock_bh(&sctp_lock);
+                               return -1;
+                       }
+               } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+                       /* Sec 8.5.1 (D) */
+                       if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+                               write_unlock_bh(&sctp_lock);
+                               return -1;
+                       }
+               }
+
+               oldsctpstate = conntrack->proto.sctp.state;
+               newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
+
+               /* Invalid */
+               if (newconntrack == SCTP_CONNTRACK_MAX) {
+                       DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
+                              CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
+                       write_unlock_bh(&sctp_lock);
+                       return -1;
+               }
+
+               /* If it is an INIT or an INIT ACK note down the vtag */
+               if (sch->type == SCTP_CID_INIT 
+                       || sch->type == SCTP_CID_INIT_ACK) {
+                       sctp_inithdr_t _inithdr, *ih;
+
+                       ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+                                               sizeof(_inithdr), &_inithdr);
+                       if (ih == NULL) {
+                                       write_unlock_bh(&sctp_lock);
+                                       return -1;
+                       }
+                       DEBUGP("Setting vtag %x for dir %d\n", 
+                                       ih->init_tag, !CTINFO2DIR(ctinfo));
+                       conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
+               }
+
+               conntrack->proto.sctp.state = newconntrack;
+               if (oldsctpstate != newconntrack)
+                       nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+               write_unlock_bh(&sctp_lock);
+       }
+
+       nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
+
+       if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
+               && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
+               && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+               DEBUGP("Setting assured bit\n");
+               set_bit(IPS_ASSURED_BIT, &conntrack->status);
+               nf_conntrack_event_cache(IPCT_STATUS, skb);
+       }
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
+                   unsigned int dataoff)
+{
+       enum sctp_conntrack newconntrack;
+       sctp_sctphdr_t _sctph, *sh;
+       sctp_chunkhdr_t _sch, *sch;
+       u_int32_t offset, count;
+       char map[256 / sizeof (char)] = {0};
+
+       DEBUGP(__FUNCTION__);
+       DEBUGP("\n");
+
+       sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
+       if (sh == NULL)
+               return 0;
+
+       if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
+               return 0;
+
+       /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+       if ((test_bit (SCTP_CID_ABORT, (void *)map))
+               || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
+               || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
+               return 0;
+       }
+
+       newconntrack = SCTP_CONNTRACK_MAX;
+       for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+               /* Don't need lock here: this conntrack not in circulation yet */
+               newconntrack = new_state(IP_CT_DIR_ORIGINAL, 
+                                        SCTP_CONNTRACK_NONE, sch->type);
+
+               /* Invalid: delete conntrack */
+               if (newconntrack == SCTP_CONNTRACK_MAX) {
+                       DEBUGP("nf_conntrack_sctp: invalid new deleting.\n");
+                       return 0;
+               }
+
+               /* Copy the vtag into the state info */
+               if (sch->type == SCTP_CID_INIT) {
+                       if (sh->vtag == 0) {
+                               sctp_inithdr_t _inithdr, *ih;
+
+                               ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+                                                       sizeof(_inithdr), &_inithdr);
+                               if (ih == NULL)
+                                       return 0;
+
+                               DEBUGP("Setting vtag %x for new conn\n", 
+                                       ih->init_tag);
+
+                               conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = 
+                                                               ih->init_tag;
+                       } else {
+                               /* Sec 8.5.1 (A) */
+                               return 0;
+                       }
+               }
+               /* If it is a shutdown ack OOTB packet, we expect a return
+                  shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+               else {
+                       DEBUGP("Setting vtag %x for new conn OOTB\n", 
+                               sh->vtag);
+                       conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+               }
+
+               conntrack->proto.sctp.state = newconntrack;
+       }
+
+       return 1;
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { 
+       .l3proto         = PF_INET,
+       .proto           = IPPROTO_SCTP, 
+       .name            = "sctp",
+       .pkt_to_tuple    = sctp_pkt_to_tuple, 
+       .invert_tuple    = sctp_invert_tuple, 
+       .print_tuple     = sctp_print_tuple, 
+       .print_conntrack = sctp_print_conntrack,
+       .packet          = sctp_packet, 
+       .new             = sctp_new, 
+       .destroy         = NULL, 
+       .me              = THIS_MODULE 
+};
+
+struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { 
+       .l3proto         = PF_INET6,
+       .proto           = IPPROTO_SCTP, 
+       .name            = "sctp",
+       .pkt_to_tuple    = sctp_pkt_to_tuple, 
+       .invert_tuple    = sctp_invert_tuple, 
+       .print_tuple     = sctp_print_tuple, 
+       .print_conntrack = sctp_print_conntrack,
+       .packet          = sctp_packet, 
+       .new             = sctp_new, 
+       .destroy         = NULL, 
+       .me              = THIS_MODULE 
+};
+
+#ifdef CONFIG_SYSCTL
+static ctl_table nf_ct_sysctl_table[] = {
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
+               .procname       = "nf_conntrack_sctp_timeout_closed",
+               .data           = &nf_ct_sctp_timeout_closed,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
+               .procname       = "nf_conntrack_sctp_timeout_cookie_wait",
+               .data           = &nf_ct_sctp_timeout_cookie_wait,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
+               .procname       = "nf_conntrack_sctp_timeout_cookie_echoed",
+               .data           = &nf_ct_sctp_timeout_cookie_echoed,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
+               .procname       = "nf_conntrack_sctp_timeout_established",
+               .data           = &nf_ct_sctp_timeout_established,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
+               .procname       = "nf_conntrack_sctp_timeout_shutdown_sent",
+               .data           = &nf_ct_sctp_timeout_shutdown_sent,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
+               .procname       = "nf_conntrack_sctp_timeout_shutdown_recd",
+               .data           = &nf_ct_sctp_timeout_shutdown_recd,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
+               .procname       = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
+               .data           = &nf_ct_sctp_timeout_shutdown_ack_sent,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+       {
+               .ctl_name       = NET_NETFILTER,
+               .procname       = "netfilter",
+               .mode           = 0555,
+               .child          = nf_ct_sysctl_table,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+       {
+               .ctl_name       = CTL_NET,
+               .procname       = "net",
+               .mode           = 0555, 
+               .child          = nf_ct_netfilter_table,
+       },
+       { .ctl_name = 0 }
+};
+
+static struct ctl_table_header *nf_ct_sysctl_header;
+#endif
+
+int __init init(void)
+{
+       int ret;
+
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4);
+       if (ret) {
+               printk("nf_conntrack_proto_sctp4: protocol register failed\n");
+               goto out;
+       }
+       ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6);
+       if (ret) {
+               printk("nf_conntrack_proto_sctp6: protocol register failed\n");
+               goto cleanup_sctp4;
+       }
+
+#ifdef CONFIG_SYSCTL
+       nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+       if (nf_ct_sysctl_header == NULL) {
+               printk("nf_conntrack_proto_sctp: can't register to sysctl.\n");
+               goto cleanup;
+       }
+#endif
+
+       return ret;
+
+#ifdef CONFIG_SYSCTL
+ cleanup:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
+#endif
+ cleanup_sctp4:
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
+ out:
+       DEBUGP("SCTP conntrack module loading %s\n", 
+                                       ret ? "failed": "succeeded");
+       return ret;
+}
+
+void __exit fini(void)
+{
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
+       nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
+#ifdef CONFIG_SYSCTL
+       unregister_sysctl_table(nf_ct_sysctl_header);
+#endif
+       DEBUGP("SCTP conntrack module unloaded\n");
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
new file mode 100644 (file)
index 0000000..83d90dd
--- /dev/null
@@ -0,0 +1,1162 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
+ *     - Real stateful connection tracking
+ *     - Modified state transitions table
+ *     - Window scaling support added
+ *     - SACK support added
+ *
+ * Willy Tarreau:
+ *     - State table bugfixes
+ *     - More robust state changes
+ *     - Tuning timer parameters
+ *
+ * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - genelized Layer 3 protocol part.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+ *
+ * version 2.2
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+
+#if 0
+#define DEBUGP printk
+#define DEBUGP_VARS
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.tcp */
+static DEFINE_RWLOCK(tcp_lock);
+
+/* "Be conservative in what you do, 
+    be liberal in what you accept from others." 
+    If it's non-zero, we mark only out of window RST segments as INVALID. */
+int nf_ct_tcp_be_liberal = 0;
+
+/* When connection is picked up from the middle, how many packets are required
+   to pass in each direction when we assume we are in sync - if any side uses
+   window scaling, we lost the game. 
+   If it is set to zero, we disable picking up already established 
+   connections. */
+int nf_ct_tcp_loose = 3;
+
+/* Max number of the retransmitted packets without receiving an (acceptable) 
+   ACK from the destination. If this number is reached, a shorter timer 
+   will be started. */
+int nf_ct_tcp_max_retrans = 3;
+
+  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+     closely.  They're more complex. --RR */
+
+static const char *tcp_conntrack_names[] = {
+       "NONE",
+       "SYN_SENT",
+       "SYN_RECV",
+       "ESTABLISHED",
+       "FIN_WAIT",
+       "CLOSE_WAIT",
+       "LAST_ACK",
+       "TIME_WAIT",
+       "CLOSE",
+       "LISTEN"
+};
+  
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+unsigned long nf_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned long nf_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned long nf_ct_tcp_timeout_established =   5 DAYS;
+unsigned long nf_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned long nf_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned long nf_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned long nf_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned long nf_ct_tcp_timeout_close =        10 SECS;
+
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+   Linux uses 15 packets as limit, which corresponds 
+   to ~13-30min depending on RTO. */
+unsigned long nf_ct_tcp_timeout_max_retrans =     5 MINS;
+static unsigned long * tcp_timeouts[]
+= { NULL,                              /* TCP_CONNTRACK_NONE */
+    &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
+    &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
+    &nf_ct_tcp_timeout_established,    /* TCP_CONNTRACK_ESTABLISHED, */
+    &nf_ct_tcp_timeout_fin_wait,       /* TCP_CONNTRACK_FIN_WAIT, */
+    &nf_ct_tcp_timeout_close_wait,     /* TCP_CONNTRACK_CLOSE_WAIT, */
+    &nf_ct_tcp_timeout_last_ack,       /* TCP_CONNTRACK_LAST_ACK, */
+    &nf_ct_tcp_timeout_time_wait,      /* TCP_CONNTRACK_TIME_WAIT, */
+    &nf_ct_tcp_timeout_close,          /* TCP_CONNTRACK_CLOSE, */
+    NULL,                              /* TCP_CONNTRACK_LISTEN */
+ };
+#define sNO TCP_CONNTRACK_NONE
+#define sSS TCP_CONNTRACK_SYN_SENT
+#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
+#define sFW TCP_CONNTRACK_FIN_WAIT
+#define sCW TCP_CONNTRACK_CLOSE_WAIT
+#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
+#define sLI TCP_CONNTRACK_LISTEN
+#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+       TCP_SYN_SET,
+       TCP_SYNACK_SET,
+       TCP_FIN_SET,
+       TCP_ACK_SET,
+       TCP_RST_SET,
+       TCP_NONE_SET,
+};
+  
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments 
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE:       initial state
+ * SYN_SENT:   SYN-only packet seen 
+ * SYN_RECV:   SYN-ACK packet seen
+ * ESTABLISHED:        ACK packet seen
+ * FIN_WAIT:   FIN packet seen
+ * CLOSE_WAIT: ACK seen (after FIN) 
+ * LAST_ACK:   FIN seen (after FIN)
+ * TIME_WAIT:  last ACK seen
+ * CLOSE:      closed connection
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ *     if they may be either invalid or valid 
+ *     and the receiver may send back a connection 
+ *     closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ *     if they are invalid
+ *     or we do not support the request (simultaneous open)
+ */
+static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+       {
+/* ORIGINAL */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*syn*/           { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ *     sNO -> sSS      Initialize a new connection
+ *     sSS -> sSS      Retransmitted SYN
+ *     sSR -> sIG      Late retransmitted SYN?
+ *     sES -> sIG      Error: SYNs in window outside the SYN_SENT state
+ *                     are errors. Receiver will reply with RST 
+ *                     and close the connection.
+ *                     Or we are not in sync and hold a dead connection.
+ *     sFW -> sIG
+ *     sCW -> sIG
+ *     sLA -> sIG
+ *     sTW -> sSS      Reopened connection (RFC 1122).
+ *     sCL -> sSS
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ *     - either it tries to set up a simultaneous open, which is 
+ *       not supported;
+ *     - or the firewall has just been inserted between the two hosts
+ *       during the session set-up. The SYN will be retransmitted 
+ *       by the true client (or it'll time out).
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *     sNO -> sIV      Too late and no reason to do anything...
+ *     sSS -> sIV      Client migth not send FIN in this state:
+ *                     we enforce waiting for a SYN/ACK reply first.
+ *     sSR -> sFW      Close started.
+ *     sES -> sFW
+ *     sFW -> sLA      FIN seen in both directions, waiting for
+ *                     the last ACK. 
+ *                     Migth be a retransmitted FIN as well...
+ *     sCW -> sLA
+ *     sLA -> sLA      Retransmitted FIN. Remain in the same state.
+ *     sTW -> sTW
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*ack*/           { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *     sNO -> sES      Assumed.
+ *     sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
+ *     sSR -> sES      Established state is reached.
+ *     sES -> sES      :-)
+ *     sFW -> sCW      Normal close request answered by ACK.
+ *     sCW -> sCW
+ *     sLA -> sTW      Last ACK detected.
+ *     sTW -> sTW      Retransmitted last ACK. Remain in the same state.
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+       },
+       {
+/* REPLY */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*syn*/           { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ *     sNO -> sIV      Never reached.
+ *     sSS -> sIV      Simultaneous open, not supported
+ *     sSR -> sIV      Simultaneous open, not supported.
+ *     sES -> sIV      Server may not initiate a connection.
+ *     sFW -> sIV
+ *     sCW -> sIV
+ *     sLA -> sIV
+ *     sTW -> sIV      Reopened connection, but server may not do it.
+ *     sCL -> sIV
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ *     sSS -> sSR      Standard open.
+ *     sSR -> sSR      Retransmitted SYN/ACK.
+ *     sES -> sIG      Late retransmitted SYN/ACK?
+ *     sFW -> sIG      Might be SYN/ACK answering ignored SYN
+ *     sCW -> sIG
+ *     sLA -> sIG
+ *     sTW -> sIG
+ *     sCL -> sIG
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *     sSS -> sIV      Server might not send FIN in this state.
+ *     sSR -> sFW      Close started.
+ *     sES -> sFW
+ *     sFW -> sLA      FIN seen in both directions.
+ *     sCW -> sLA
+ *     sLA -> sLA      Retransmitted FIN.
+ *     sTW -> sTW
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*ack*/           { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *     sSS -> sIV      Might be a half-open connection.
+ *     sSR -> sSR      Might answer late resent SYN.
+ *     sES -> sES      :-)
+ *     sFW -> sCW      Normal close request answered by ACK.
+ *     sCW -> sCW
+ *     sLA -> sTW      Last ACK detected.
+ *     sTW -> sTW      Retransmitted last ACK.
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+       }
+};
+
+static int tcp_pkt_to_tuple(const struct sk_buff *skb,
+                           unsigned int dataoff,
+                           struct nf_conntrack_tuple *tuple)
+{
+       struct tcphdr _hdr, *hp;
+
+       /* Actually only need first 8 bytes. */
+       hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+       if (hp == NULL)
+               return 0;
+
+       tuple->src.u.tcp.port = hp->source;
+       tuple->dst.u.tcp.port = hp->dest;
+
+       return 1;
+}
+
+static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                           const struct nf_conntrack_tuple *orig)
+{
+       tuple->src.u.tcp.port = orig->dst.u.tcp.port;
+       tuple->dst.u.tcp.port = orig->src.u.tcp.port;
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int tcp_print_tuple(struct seq_file *s,
+                          const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "sport=%hu dport=%hu ",
+                         ntohs(tuple->src.u.tcp.port),
+                         ntohs(tuple->dst.u.tcp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int tcp_print_conntrack(struct seq_file *s,
+                              const struct nf_conn *conntrack)
+{
+       enum tcp_conntrack state;
+
+       read_lock_bh(&tcp_lock);
+       state = conntrack->proto.tcp.state;
+       read_unlock_bh(&tcp_lock);
+
+       return seq_printf(s, "%s ", tcp_conntrack_names[state]);
+}
+
+static unsigned int get_conntrack_index(const struct tcphdr *tcph)
+{
+       if (tcph->rst) return TCP_RST_SET;
+       else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+       else if (tcph->fin) return TCP_FIN_SET;
+       else if (tcph->ack) return TCP_ACK_SET;
+       else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+   in IP Filter' by Guido van Rooij.
+   
+   http://www.nluug.nl/events/sane2000/papers.html
+   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+   
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
+       td_maxend = max(sack + max(win,1)) seen in reply packets
+       td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+       td_maxwin += seq + len - sender.td_maxend
+                       if seq + len > sender.td_maxend
+       td_end    = max(seq + len) seen in sent packets
+   
+   I.   Upper bound for valid data:    seq <= sender.td_maxend
+   II.  Lower bound for valid data:    seq + len >= sender.td_end - receiver.td_maxwin
+   III.        Upper bound for valid ack:      sack <= receiver.td_end
+   IV. Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
+
+   where sack is the highest right edge of sack block found in the packet.
+
+   The upper bound limit for a valid ack is not ignored - 
+   we doesn't have to deal with fragments. 
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+                                        size_t len,
+                                        unsigned int dataoff,
+                                        struct tcphdr *tcph)
+{
+       /* XXX Should I use payload length field in IP/IPv6 header ?
+        * - YK */
+       return (seq + len - dataoff - tcph->doff*4
+               + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+  
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST                 66000
+#define MAXACKWINDOW(sender)                                           \
+       ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
+                                             : MAXACKWINCONST)
+  
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+                       unsigned int dataoff,
+                       struct tcphdr *tcph, 
+                       struct ip_ct_tcp_state *state)
+{
+       unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+       unsigned char *ptr;
+       int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+       if (!length)
+               return;
+
+       ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+                                length, buff);
+       BUG_ON(ptr == NULL);
+
+       state->td_scale = 
+       state->flags = 0;
+
+       while (length > 0) {
+               int opcode=*ptr++;
+               int opsize;
+
+               switch (opcode) {
+               case TCPOPT_EOL:
+                       return;
+               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                       length--;
+                       continue;
+               default:
+                       opsize=*ptr++;
+                       if (opsize < 2) /* "silly options" */
+                               return;
+                       if (opsize > length)
+                               break;  /* don't parse partial options */
+
+                       if (opcode == TCPOPT_SACK_PERM 
+                           && opsize == TCPOLEN_SACK_PERM)
+                               state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+                       else if (opcode == TCPOPT_WINDOW
+                                && opsize == TCPOLEN_WINDOW) {
+                               state->td_scale = *(u_int8_t *)ptr;
+
+                               if (state->td_scale > 14) {
+                                       /* See RFC1323 */
+                                       state->td_scale = 14;
+                               }
+                               state->flags |=
+                                       IP_CT_TCP_FLAG_WINDOW_SCALE;
+                       }
+                       ptr += opsize - 2;
+                       length -= opsize;
+               }
+       }
+}
+
+static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
+                    struct tcphdr *tcph, __u32 *sack)
+{
+        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+       unsigned char *ptr;
+       int length = (tcph->doff*4) - sizeof(struct tcphdr);
+       __u32 tmp;
+
+       if (!length)
+               return;
+
+       ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+                                length, buff);
+       BUG_ON(ptr == NULL);
+
+       /* Fast path for timestamp-only option */
+       if (length == TCPOLEN_TSTAMP_ALIGNED*4
+           && *(__u32 *)ptr ==
+               __constant_ntohl((TCPOPT_NOP << 24) 
+                                | (TCPOPT_NOP << 16)
+                                | (TCPOPT_TIMESTAMP << 8)
+                                | TCPOLEN_TIMESTAMP))
+               return;
+
+       while (length > 0) {
+               int opcode = *ptr++;
+               int opsize, i;
+
+               switch (opcode) {
+               case TCPOPT_EOL:
+                       return;
+               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                       length--;
+                       continue;
+               default:
+                       opsize = *ptr++;
+                       if (opsize < 2) /* "silly options" */
+                               return;
+                       if (opsize > length)
+                               break;  /* don't parse partial options */
+
+                       if (opcode == TCPOPT_SACK 
+                           && opsize >= (TCPOLEN_SACK_BASE 
+                                         + TCPOLEN_SACK_PERBLOCK)
+                           && !((opsize - TCPOLEN_SACK_BASE) 
+                                % TCPOLEN_SACK_PERBLOCK)) {
+                               for (i = 0;
+                                    i < (opsize - TCPOLEN_SACK_BASE);
+                                    i += TCPOLEN_SACK_PERBLOCK) {
+                                       memcpy(&tmp, (__u32 *)(ptr + i) + 1,
+                                              sizeof(__u32));
+                                       tmp = ntohl(tmp);
+
+                                       if (after(tmp, *sack))
+                                               *sack = tmp;
+                               }
+                               return;
+                       }
+                       ptr += opsize - 2;
+                       length -= opsize;
+               }
+       }
+}
+
+static int tcp_in_window(struct ip_ct_tcp *state, 
+                         enum ip_conntrack_dir dir,
+                         unsigned int index,
+                         const struct sk_buff *skb,
+                        unsigned int dataoff,
+                         struct tcphdr *tcph,
+                        int pf)
+{
+       struct ip_ct_tcp_state *sender = &state->seen[dir];
+       struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+       __u32 seq, ack, sack, end, win, swin;
+       int res;
+
+       /*
+        * Get the required data from the packet.
+        */
+       seq = ntohl(tcph->seq);
+       ack = sack = ntohl(tcph->ack_seq);
+       win = ntohs(tcph->window);
+       end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
+
+       if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+               tcp_sack(skb, dataoff, tcph, &sack);
+
+       DEBUGP("tcp_in_window: START\n");
+       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "seq=%u ack=%u sack=%u win=%u end=%u\n",
+               NIPQUAD(iph->saddr), ntohs(tcph->source), 
+               NIPQUAD(iph->daddr), ntohs(tcph->dest),
+               seq, ack, sack, win, end);
+       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 
+               receiver->td_scale);
+
+       if (sender->td_end == 0) {
+               /*
+                * Initialize sender data.
+                */
+               if (tcph->syn && tcph->ack) {
+                       /*
+                        * Outgoing SYN-ACK in reply to a SYN.
+                        */
+                       sender->td_end = 
+                       sender->td_maxend = end;
+                       sender->td_maxwin = (win == 0 ? 1 : win);
+
+                       tcp_options(skb, dataoff, tcph, sender);
+                       /* 
+                        * RFC 1323:
+                        * Both sides must send the Window Scale option
+                        * to enable window scaling in either direction.
+                        */
+                       if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
+                             && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
+                               sender->td_scale = 
+                               receiver->td_scale = 0;
+               } else {
+                       /*
+                        * We are in the middle of a connection,
+                        * its history is lost for us.
+                        * Let's try to use the data from the packet.
+                        */
+                       sender->td_end = end;
+                       sender->td_maxwin = (win == 0 ? 1 : win);
+                       sender->td_maxend = end + sender->td_maxwin;
+               }
+       } else if (((state->state == TCP_CONNTRACK_SYN_SENT
+                    && dir == IP_CT_DIR_ORIGINAL)
+                  || (state->state == TCP_CONNTRACK_SYN_RECV
+                    && dir == IP_CT_DIR_REPLY))
+                  && after(end, sender->td_end)) {
+               /*
+                * RFC 793: "if a TCP is reinitialized ... then it need
+                * not wait at all; it must only be sure to use sequence 
+                * numbers larger than those recently used."
+                */
+               sender->td_end =
+               sender->td_maxend = end;
+               sender->td_maxwin = (win == 0 ? 1 : win);
+
+               tcp_options(skb, dataoff, tcph, sender);
+       }
+
+       if (!(tcph->ack)) {
+               /*
+                * If there is no ACK, just pretend it was set and OK.
+                */
+               ack = sack = receiver->td_end;
+       } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 
+                   (TCP_FLAG_ACK|TCP_FLAG_RST)) 
+                  && (ack == 0)) {
+               /*
+                * Broken TCP stacks, that set ACK in RST packets as well
+                * with zero ack value.
+                */
+               ack = sack = receiver->td_end;
+       }
+
+       if (seq == end
+           && (!tcph->rst
+               || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+               /*
+                * Packets contains no data: we assume it is valid
+                * and check the ack value only.
+                * However RST segments are always validated by their
+                * SEQ number, except when seq == 0 (reset sent answering
+                * SYN.
+                */
+               seq = end = sender->td_end;
+
+       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "seq=%u ack=%u sack =%u win=%u end=%u\n",
+               NIPQUAD(iph->saddr), ntohs(tcph->source),
+               NIPQUAD(iph->daddr), ntohs(tcph->dest),
+               seq, ack, sack, win, end);
+       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
+
+       DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+               before(seq, sender->td_maxend + 1),
+               after(end, sender->td_end - receiver->td_maxwin - 1),
+               before(sack, receiver->td_end + 1),
+               after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+
+       if (sender->loose || receiver->loose ||
+           (before(seq, sender->td_maxend + 1) &&
+            after(end, sender->td_end - receiver->td_maxwin - 1) &&
+            before(sack, receiver->td_end + 1) &&
+            after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+               /*
+                * Take into account window scaling (RFC 1323).
+                */
+               if (!tcph->syn)
+                       win <<= sender->td_scale;
+
+               /*
+                * Update sender data.
+                */
+               swin = win + (sack - ack);
+               if (sender->td_maxwin < swin)
+                       sender->td_maxwin = swin;
+               if (after(end, sender->td_end))
+                       sender->td_end = end;
+               /*
+                * Update receiver data.
+                */
+               if (after(end, sender->td_maxend))
+                       receiver->td_maxwin += end - sender->td_maxend;
+               if (after(sack + win, receiver->td_maxend - 1)) {
+                       receiver->td_maxend = sack + win;
+                       if (win == 0)
+                               receiver->td_maxend++;
+               }
+
+               /* 
+                * Check retransmissions.
+                */
+               if (index == TCP_ACK_SET) {
+                       if (state->last_dir == dir
+                           && state->last_seq == seq
+                           && state->last_ack == ack
+                           && state->last_end == end)
+                               state->retrans++;
+                       else {
+                               state->last_dir = dir;
+                               state->last_seq = seq;
+                               state->last_ack = ack;
+                               state->last_end = end;
+                               state->retrans = 0;
+                       }
+               }
+               /*
+                * Close the window of disabled window tracking :-)
+                */
+               if (sender->loose)
+                       sender->loose--;
+
+               res = 1;
+       } else {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                       "nf_ct_tcp: %s ",
+                       before(seq, sender->td_maxend + 1) ?
+                       after(end, sender->td_end - receiver->td_maxwin - 1) ?
+                       before(sack, receiver->td_end + 1) ?
+                       after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+                       : "ACK is under the lower bound (possible overly delayed ACK)"
+                       : "ACK is over the upper bound (ACKed data not seen yet)"
+                       : "SEQ is under the lower bound (already ACKed data retransmitted)"
+                       : "SEQ is over the upper bound (over the window of the receiver)");
+
+               res = nf_ct_tcp_be_liberal;
+       }
+  
+       DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+              "receiver end=%u maxend=%u maxwin=%u\n",
+               res, sender->td_end, sender->td_maxend, sender->td_maxwin, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+       return res;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+/* Caller must linearize skb at tcp header. */
+void nf_conntrack_tcp_update(struct sk_buff *skb,
+                            unsigned int dataoff,
+                            struct nf_conn *conntrack, 
+                            int dir)
+{
+       struct tcphdr *tcph = (void *)skb->data + dataoff;
+       __u32 end;
+#ifdef DEBUGP_VARS
+       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
+       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
+#endif
+
+       end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
+
+       write_lock_bh(&tcp_lock);
+       /*
+        * We have to worry for the ack in the reply packet only...
+        */
+       if (after(end, conntrack->proto.tcp.seen[dir].td_end))
+               conntrack->proto.tcp.seen[dir].td_end = end;
+       conntrack->proto.tcp.last_end = end;
+       write_unlock_bh(&tcp_lock);
+       DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
+}
+#endif
+
+#define        TH_FIN  0x01
+#define        TH_SYN  0x02
+#define        TH_RST  0x04
+#define        TH_PUSH 0x08
+#define        TH_ACK  0x10
+#define        TH_URG  0x20
+#define        TH_ECE  0x40
+#define        TH_CWR  0x80
+
+/* table of valid flag combinations - ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+{
+       [TH_SYN]                        = 1,
+       [TH_SYN|TH_ACK]                 = 1,
+       [TH_SYN|TH_ACK|TH_PUSH]         = 1,
+       [TH_RST]                        = 1,
+       [TH_RST|TH_ACK]                 = 1,
+       [TH_RST|TH_ACK|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK]                 = 1,
+       [TH_ACK]                        = 1,
+       [TH_ACK|TH_PUSH]                = 1,
+       [TH_ACK|TH_URG]                 = 1,
+       [TH_ACK|TH_URG|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK|TH_URG]          = 1,
+       [TH_FIN|TH_ACK|TH_URG|TH_PUSH]  = 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
+static int tcp_error(struct sk_buff *skb,
+                    unsigned int dataoff,
+                    enum ip_conntrack_info *ctinfo,
+                    int pf,
+                    unsigned int hooknum,
+                    int(*csum)(const struct sk_buff *,unsigned int))
+{
+       struct tcphdr _tcph, *th;
+       unsigned int tcplen = skb->len - dataoff;
+       u_int8_t tcpflags;
+
+       /* Smaller that minimal TCP header? */
+       th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+       if (th == NULL) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                               "nf_ct_tcp: short packet ");
+               return -NF_ACCEPT;
+       }
+  
+       /* Not whole TCP header or malformed packet */
+       if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                               "nf_ct_tcp: truncated/malformed packet ");
+               return -NF_ACCEPT;
+       }
+  
+       /* Checksum invalid? Ignore.
+        * We skip checking packets on the outgoing path
+        * because the semantic of CHECKSUM_HW is different there 
+        * and moreover root might send raw packets.
+        */
+       /* FIXME: Source route IP option packets --RR */
+       if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
+            (pf == PF_INET6 && hooknum  == NF_IP6_PRE_ROUTING))
+           && skb->ip_summed != CHECKSUM_UNNECESSARY
+           && csum(skb, dataoff)) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                 "nf_ct_tcp: bad TCP checksum ");
+               return -NF_ACCEPT;
+       }
+
+       /* Check TCP flags. */
+       tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+       if (!tcp_valid_flags[tcpflags]) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                 "nf_ct_tcp: invalid TCP flag combination ");
+               return -NF_ACCEPT;
+       }
+
+       return NF_ACCEPT;
+}
+
+static int csum4(const struct sk_buff *skb, unsigned int dataoff)
+{
+       return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+                                skb->len - dataoff, IPPROTO_TCP,
+                                skb->ip_summed == CHECKSUM_HW ? skb->csum
+                                : skb_checksum(skb, dataoff,
+                                               skb->len - dataoff, 0));
+}
+
+static int csum6(const struct sk_buff *skb, unsigned int dataoff)
+{
+       return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+                              skb->len - dataoff, IPPROTO_TCP,
+                              skb->ip_summed == CHECKSUM_HW ? skb->csum
+                              : skb_checksum(skb, dataoff, skb->len - dataoff,
+                                             0));
+}
+
+static int tcp_error4(struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info *ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4);
+}
+
+static int tcp_error6(struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info *ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6);
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int tcp_packet(struct nf_conn *conntrack,
+                     const struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       enum tcp_conntrack new_state, old_state;
+       enum ip_conntrack_dir dir;
+       struct tcphdr *th, _tcph;
+       unsigned long timeout;
+       unsigned int index;
+
+       th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+       BUG_ON(th == NULL);
+
+       write_lock_bh(&tcp_lock);
+       old_state = conntrack->proto.tcp.state;
+       dir = CTINFO2DIR(ctinfo);
+       index = get_conntrack_index(th);
+       new_state = tcp_conntracks[dir][index][old_state];
+
+       switch (new_state) {
+       case TCP_CONNTRACK_IGNORE:
+               /* Either SYN in ORIGINAL
+                * or SYN/ACK in REPLY. */
+               if (index == TCP_SYNACK_SET
+                   && conntrack->proto.tcp.last_index == TCP_SYN_SET
+                   && conntrack->proto.tcp.last_dir != dir
+                   && ntohl(th->ack_seq) ==
+                            conntrack->proto.tcp.last_end) {
+                       /* This SYN/ACK acknowledges a SYN that we earlier 
+                        * ignored as invalid. This means that the client and
+                        * the server are both in sync, while the firewall is
+                        * not. We kill this session and block the SYN/ACK so
+                        * that the client cannot but retransmit its SYN and 
+                        * thus initiate a clean new session.
+                        */
+                       write_unlock_bh(&tcp_lock);
+                       if (LOG_INVALID(IPPROTO_TCP))
+                               nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                         "nf_ct_tcp: killing out of sync session ");
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return -NF_DROP;
+               }
+               conntrack->proto.tcp.last_index = index;
+               conntrack->proto.tcp.last_dir = dir;
+               conntrack->proto.tcp.last_seq = ntohl(th->seq);
+               conntrack->proto.tcp.last_end =
+                   segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
+
+               write_unlock_bh(&tcp_lock);
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                 "nf_ct_tcp: invalid packed ignored ");
+               return NF_ACCEPT;
+       case TCP_CONNTRACK_MAX:
+               /* Invalid packet */
+               DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+                      dir, get_conntrack_index(th),
+                      old_state);
+               write_unlock_bh(&tcp_lock);
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                 "nf_ct_tcp: invalid state ");
+               return -NF_ACCEPT;
+       case TCP_CONNTRACK_SYN_SENT:
+               if (old_state < TCP_CONNTRACK_TIME_WAIT)
+                       break;
+               if ((conntrack->proto.tcp.seen[dir].flags &
+                       IP_CT_TCP_FLAG_CLOSE_INIT)
+                   || after(ntohl(th->seq),
+                            conntrack->proto.tcp.seen[dir].td_end)) {
+                       /* Attempt to reopen a closed connection.
+                       * Delete this connection and look up again. */
+                       write_unlock_bh(&tcp_lock);
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return -NF_REPEAT;
+               }
+       case TCP_CONNTRACK_CLOSE:
+               if (index == TCP_RST_SET
+                   && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+                   && conntrack->proto.tcp.last_index == TCP_SYN_SET
+                   && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+                       /* RST sent to invalid SYN we had let trough
+                        * SYN was in window then, tear down connection.
+                        * We skip window checking, because packet might ACK
+                        * segments we ignored in the SYN. */
+                       goto in_window;
+               }
+               /* Just fall trough */
+       default:
+               /* Keep compilers happy. */
+               break;
+       }
+
+       if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
+                          skb, dataoff, th, pf)) {
+               write_unlock_bh(&tcp_lock);
+               return -NF_ACCEPT;
+       }
+     in_window:
+       /* From now on we have got in-window packets */
+       conntrack->proto.tcp.last_index = index;
+
+       DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+               NIPQUAD(iph->saddr), ntohs(th->source),
+               NIPQUAD(iph->daddr), ntohs(th->dest),
+               (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+               (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+               old_state, new_state);
+
+       conntrack->proto.tcp.state = new_state;
+       if (old_state != new_state
+           && (new_state == TCP_CONNTRACK_FIN_WAIT
+               || new_state == TCP_CONNTRACK_CLOSE))
+               conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+       timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
+                 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
+                 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+       write_unlock_bh(&tcp_lock);
+
+       nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+       if (new_state != old_state)
+               nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+
+       if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+               /* If only reply is a RST, we can consider ourselves not to
+                  have an established connection: this is a fairly common
+                  problem case, so we can delete the conntrack
+                  immediately.  --RR */
+               if (th->rst) {
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return NF_ACCEPT;
+               }
+       } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+                  && (old_state == TCP_CONNTRACK_SYN_RECV
+                      || old_state == TCP_CONNTRACK_ESTABLISHED)
+                  && new_state == TCP_CONNTRACK_ESTABLISHED) {
+               /* Set ASSURED if we see see valid ack in ESTABLISHED 
+                  after SYN_RECV or a valid answer for a picked up 
+                  connection. */
+               set_bit(IPS_ASSURED_BIT, &conntrack->status);
+               nf_conntrack_event_cache(IPCT_STATUS, skb);
+       }
+       nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+
+       return NF_ACCEPT;
+}
+/* Called when a new connection for this protocol found. */
+static int tcp_new(struct nf_conn *conntrack,
+                  const struct sk_buff *skb,
+                  unsigned int dataoff)
+{
+       enum tcp_conntrack new_state;
+       struct tcphdr *th, _tcph;
+#ifdef DEBUGP_VARS
+       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
+       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
+#endif
+
+       th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+       BUG_ON(th == NULL);
+
+       /* Don't need lock here: this conntrack not in circulation yet */
+       new_state
+               = tcp_conntracks[0][get_conntrack_index(th)]
+               [TCP_CONNTRACK_NONE];
+
+       /* Invalid: delete conntrack */
+       if (new_state >= TCP_CONNTRACK_MAX) {
+               DEBUGP("nf_ct_tcp: invalid new deleting.\n");
+               return 0;
+       }
+
+       if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               /* SYN packet */
+               conntrack->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            dataoff, th);
+               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
+               conntrack->proto.tcp.seen[0].td_maxend =
+                       conntrack->proto.tcp.seen[0].td_end;
+
+               tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
+               conntrack->proto.tcp.seen[1].flags = 0;
+               conntrack->proto.tcp.seen[0].loose = 
+               conntrack->proto.tcp.seen[1].loose = 0;
+       } else if (nf_ct_tcp_loose == 0) {
+               /* Don't try to pick up connections. */
+               return 0;
+       } else {
+               /*
+                * We are in the middle of a connection,
+                * its history is lost for us.
+                * Let's try to use the data from the packet.
+                */
+               conntrack->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            dataoff, th);
+               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
+               conntrack->proto.tcp.seen[0].td_maxend =
+                       conntrack->proto.tcp.seen[0].td_end + 
+                       conntrack->proto.tcp.seen[0].td_maxwin;
+               conntrack->proto.tcp.seen[0].td_scale = 0;
+
+               /* We assume SACK. Should we assume window scaling too? */
+               conntrack->proto.tcp.seen[0].flags =
+               conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+               conntrack->proto.tcp.seen[0].loose = 
+               conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose;
+       }
+    
+       conntrack->proto.tcp.seen[1].td_end = 0;
+       conntrack->proto.tcp.seen[1].td_maxend = 0;
+       conntrack->proto.tcp.seen[1].td_maxwin = 1;
+       conntrack->proto.tcp.seen[1].td_scale = 0;      
+
+       /* tcp_packet will set them */
+       conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
+       conntrack->proto.tcp.last_index = TCP_NONE_SET;
+        
+       DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
+       return 1;
+}
+  
+struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
+{
+       .l3proto                = PF_INET,
+       .proto                  = IPPROTO_TCP,
+       .name                   = "tcp",
+       .pkt_to_tuple           = tcp_pkt_to_tuple,
+       .invert_tuple           = tcp_invert_tuple,
+       .print_tuple            = tcp_print_tuple,
+       .print_conntrack        = tcp_print_conntrack,
+       .packet                 = tcp_packet,
+       .new                    = tcp_new,
+       .error                  = tcp_error4,
+};
+
+struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
+{
+       .l3proto                = PF_INET6,
+       .proto                  = IPPROTO_TCP,
+       .name                   = "tcp",
+       .pkt_to_tuple           = tcp_pkt_to_tuple,
+       .invert_tuple           = tcp_invert_tuple,
+       .print_tuple            = tcp_print_tuple,
+       .print_conntrack        = tcp_print_conntrack,
+       .packet                 = tcp_packet,
+       .new                    = tcp_new,
+       .error                  = tcp_error6,
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
+EXPORT_SYMBOL(nf_conntrack_protocol_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
new file mode 100644 (file)
index 0000000..3cae7ce
--- /dev/null
@@ -0,0 +1,216 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - enable working with Layer 3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+
+unsigned long nf_ct_udp_timeout = 30*HZ;
+unsigned long nf_ct_udp_timeout_stream = 180*HZ;
+
+static int udp_pkt_to_tuple(const struct sk_buff *skb,
+                            unsigned int dataoff,
+                            struct nf_conntrack_tuple *tuple)
+{
+       struct udphdr _hdr, *hp;
+
+       /* Actually only need first 8 bytes. */
+       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL)
+               return 0;
+
+       tuple->src.u.udp.port = hp->source;
+       tuple->dst.u.udp.port = hp->dest;
+
+       return 1;
+}
+
+static int udp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                           const struct nf_conntrack_tuple *orig)
+{
+       tuple->src.u.udp.port = orig->dst.u.udp.port;
+       tuple->dst.u.udp.port = orig->src.u.udp.port;
+       return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int udp_print_tuple(struct seq_file *s,
+                          const struct nf_conntrack_tuple *tuple)
+{
+       return seq_printf(s, "sport=%hu dport=%hu ",
+                         ntohs(tuple->src.u.udp.port),
+                         ntohs(tuple->dst.u.udp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int udp_print_conntrack(struct seq_file *s,
+                              const struct nf_conn *conntrack)
+{
+       return 0;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+static int udp_packet(struct nf_conn *conntrack,
+                     const struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       /* If we've seen traffic both ways, this is some kind of UDP
+          stream.  Extend timeout. */
+       if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+               nf_ct_refresh_acct(conntrack, ctinfo, skb,
+                                  nf_ct_udp_timeout_stream);
+               /* Also, more likely to be important, and not a probe */
+               if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+                       nf_conntrack_event_cache(IPCT_STATUS, skb);
+       } else
+               nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout);
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
+                  unsigned int dataoff)
+{
+       return 1;
+}
+
+static int udp_error(struct sk_buff *skb, unsigned int dataoff,
+                    enum ip_conntrack_info *ctinfo,
+                    int pf,
+                    unsigned int hooknum,
+                    int (*csum)(const struct sk_buff *, unsigned int))
+{
+       unsigned int udplen = skb->len - dataoff;
+       struct udphdr _hdr, *hdr;
+
+       /* Header is too small? */
+       hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hdr == NULL) {
+               if (LOG_INVALID(IPPROTO_UDP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_udp: short packet ");
+               return -NF_ACCEPT;
+       }
+
+       /* Truncated/malformed packets */
+       if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+               if (LOG_INVALID(IPPROTO_UDP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                               "nf_ct_udp: truncated/malformed packet ");
+               return -NF_ACCEPT;
+       }
+
+       /* Packet with no checksum */
+       if (!hdr->check)
+               return NF_ACCEPT;
+
+       /* Checksum invalid? Ignore.
+        * We skip checking packets on the outgoing path
+        * because the semantic of CHECKSUM_HW is different there
+        * and moreover root might send raw packets.
+        * FIXME: Source route IP option packets --RR */
+       if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
+            (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))
+           && skb->ip_summed != CHECKSUM_UNNECESSARY
+           && csum(skb, dataoff)) {
+               if (LOG_INVALID(IPPROTO_UDP))
+                       nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+                               "nf_ct_udp: bad UDP checksum ");
+               return -NF_ACCEPT;
+       }
+
+       return NF_ACCEPT;
+}
+
+static int csum4(const struct sk_buff *skb, unsigned int dataoff)
+{
+       return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+                                skb->len - dataoff, IPPROTO_UDP,
+                                skb->ip_summed == CHECKSUM_HW ? skb->csum
+                                : skb_checksum(skb, dataoff,
+                                               skb->len - dataoff, 0));
+}
+
+static int csum6(const struct sk_buff *skb, unsigned int dataoff)
+{
+       return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+                              skb->len - dataoff, IPPROTO_UDP,
+                              skb->ip_summed == CHECKSUM_HW ? skb->csum
+                              : skb_checksum(skb, dataoff, skb->len - dataoff,
+                                             0));
+}
+
+static int udp_error4(struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info *ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4);
+}
+
+static int udp_error6(struct sk_buff *skb,
+                     unsigned int dataoff,
+                     enum ip_conntrack_info *ctinfo,
+                     int pf,
+                     unsigned int hooknum)
+{
+       return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
+{
+       .l3proto                = PF_INET,
+       .proto                  = IPPROTO_UDP,
+       .name                   = "udp",
+       .pkt_to_tuple           = udp_pkt_to_tuple,
+       .invert_tuple           = udp_invert_tuple,
+       .print_tuple            = udp_print_tuple,
+       .print_conntrack        = udp_print_conntrack,
+       .packet                 = udp_packet,
+       .new                    = udp_new,
+       .error                  = udp_error4,
+};
+
+struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
+{
+       .l3proto                = PF_INET6,
+       .proto                  = IPPROTO_UDP,
+       .name                   = "udp",
+       .pkt_to_tuple           = udp_pkt_to_tuple,
+       .invert_tuple           = udp_invert_tuple,
+       .print_tuple            = udp_print_tuple,
+       .print_conntrack        = udp_print_conntrack,
+       .packet                 = udp_packet,
+       .new                    = udp_new,
+       .error                  = udp_error6,
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
+EXPORT_SYMBOL(nf_conntrack_protocol_udp6);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
new file mode 100644 (file)
index 0000000..45224db
--- /dev/null
@@ -0,0 +1,869 @@
+/* This file contains all the functions required for the standalone
+   nf_conntrack module.
+
+   These are not required by the compatibility layer.
+*/
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *     - generalize L3 protocol dependent part.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_LICENSE("GPL");
+
+extern atomic_t nf_conntrack_count;
+DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
+
+static int kill_l3proto(struct nf_conn *i, void *data)
+{
+       return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == 
+                       ((struct nf_conntrack_l3proto *)data)->l3proto);
+}
+
+static int kill_proto(struct nf_conn *i, void *data)
+{
+       struct nf_conntrack_protocol *proto;
+       proto = (struct nf_conntrack_protocol *)data;
+       return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
+                       proto->proto) &&
+              (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
+                       proto->l3proto);
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+           struct nf_conntrack_l3proto *l3proto,
+           struct nf_conntrack_protocol *proto)
+{
+       return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple);
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static unsigned int
+seq_print_counters(struct seq_file *s,
+                  const struct ip_conntrack_counter *counter)
+{
+       return seq_printf(s, "packets=%llu bytes=%llu ",
+                         (unsigned long long)counter->packets,
+                         (unsigned long long)counter->bytes);
+}
+#else
+#define seq_print_counters(x, y)       0
+#endif
+
+struct ct_iter_state {
+       unsigned int bucket;
+};
+
+static struct list_head *ct_get_first(struct seq_file *seq)
+{
+       struct ct_iter_state *st = seq->private;
+
+       for (st->bucket = 0;
+            st->bucket < nf_conntrack_htable_size;
+            st->bucket++) {
+               if (!list_empty(&nf_conntrack_hash[st->bucket]))
+                       return nf_conntrack_hash[st->bucket].next;
+       }
+       return NULL;
+}
+
+static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+{
+       struct ct_iter_state *st = seq->private;
+
+       head = head->next;
+       while (head == &nf_conntrack_hash[st->bucket]) {
+               if (++st->bucket >= nf_conntrack_htable_size)
+                       return NULL;
+               head = nf_conntrack_hash[st->bucket].next;
+       }
+       return head;
+}
+
+static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+       struct list_head *head = ct_get_first(seq);
+
+       if (head)
+               while (pos && (head = ct_get_next(seq, head)))
+                       pos--;
+       return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       read_lock_bh(&nf_conntrack_lock);
+       return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+       read_unlock_bh(&nf_conntrack_lock);
+}
+
+/* return 0 on success, 1 in case of error */
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+       const struct nf_conntrack_tuple_hash *hash = v;
+       const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash);
+       struct nf_conntrack_l3proto *l3proto;
+       struct nf_conntrack_protocol *proto;
+
+       ASSERT_READ_LOCK(&nf_conntrack_lock);
+       NF_CT_ASSERT(conntrack);
+
+       /* we only want to print DIR_ORIGINAL */
+       if (NF_CT_DIRECTION(hash))
+               return 0;
+
+       l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+                                    .tuple.src.l3num);
+
+       NF_CT_ASSERT(l3proto);
+       proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+                                .tuple.src.l3num,
+                                conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+                                .tuple.dst.protonum);
+       NF_CT_ASSERT(proto);
+
+       if (seq_printf(s, "%-8s %u %-8s %u %ld ",
+                      l3proto->name,
+                      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num,
+                      proto->name,
+                      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+                      timer_pending(&conntrack->timeout)
+                      ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
+               return -ENOSPC;
+
+       if (l3proto->print_conntrack(s, conntrack))
+               return -ENOSPC;
+
+       if (proto->print_conntrack(s, conntrack))
+               return -ENOSPC;
+
+       if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                       l3proto, proto))
+               return -ENOSPC;
+
+       if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
+               return -ENOSPC;
+
+       if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
+               if (seq_printf(s, "[UNREPLIED] "))
+                       return -ENOSPC;
+
+       if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
+                       l3proto, proto))
+               return -ENOSPC;
+
+       if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
+               return -ENOSPC;
+
+       if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
+               if (seq_printf(s, "[ASSURED] "))
+                       return -ENOSPC;
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+       if (seq_printf(s, "mark=%u ", conntrack->mark))
+               return -ENOSPC;
+#endif
+
+       if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
+               return -ENOSPC;
+       
+       return 0;
+}
+
+static struct seq_operations ct_seq_ops = {
+       .start = ct_seq_start,
+       .next  = ct_seq_next,
+       .stop  = ct_seq_stop,
+       .show  = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       struct ct_iter_state *st;
+       int ret;
+
+       st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
+       if (st == NULL)
+               return -ENOMEM;
+       ret = seq_open(file, &ct_seq_ops);
+       if (ret)
+               goto out_free;
+       seq          = file->private_data;
+       seq->private = st;
+       memset(st, 0, sizeof(struct ct_iter_state));
+       return ret;
+out_free:
+       kfree(st);
+       return ret;
+}
+
+static struct file_operations ct_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = ct_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_private,
+};
+
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+       struct list_head *e = &nf_conntrack_expect_list;
+       loff_t i;
+
+       /* strange seq_file api calls stop even if we fail,
+        * thus we need to grab lock since stop unlocks */
+       read_lock_bh(&nf_conntrack_lock);
+
+       if (list_empty(e))
+               return NULL;
+
+       for (i = 0; i <= *pos; i++) {
+               e = e->next;
+               if (e == &nf_conntrack_expect_list)
+                       return NULL;
+       }
+       return e;
+}
+
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct list_head *e = v;
+
+       ++*pos;
+       e = e->next;
+
+       if (e == &nf_conntrack_expect_list)
+               return NULL;
+
+       return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
+       read_unlock_bh(&nf_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+       struct nf_conntrack_expect *expect = v;
+
+       if (expect->timeout.function)
+               seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+                          ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+       else
+               seq_printf(s, "- ");
+       seq_printf(s, "l3proto = %u proto=%u ",
+                  expect->tuple.src.l3num,
+                  expect->tuple.dst.protonum);
+       print_tuple(s, &expect->tuple,
+                   nf_ct_find_l3proto(expect->tuple.src.l3num),
+                   nf_ct_find_proto(expect->tuple.src.l3num,
+                                    expect->tuple.dst.protonum));
+       return seq_putc(s, '\n');
+}
+
+static struct seq_operations exp_seq_ops = {
+       .start = exp_seq_start,
+       .next = exp_seq_next,
+       .stop = exp_seq_stop,
+       .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &exp_seq_ops);
+}
+
+static struct file_operations exp_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = exp_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       int cpu;
+
+       if (*pos == 0)
+               return SEQ_START_TOKEN;
+
+       for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu + 1;
+               return &per_cpu(nf_conntrack_stat, cpu);
+       }
+
+       return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       int cpu;
+
+       for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu + 1;
+               return &per_cpu(nf_conntrack_stat, cpu);
+       }
+
+       return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+       unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+       struct ip_conntrack_stat *st = v;
+
+       if (v == SEQ_START_TOKEN) {
+               seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+               return 0;
+       }
+
+       seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
+                       "%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+                  nr_conntracks,
+                  st->searched,
+                  st->found,
+                  st->new,
+                  st->invalid,
+                  st->ignore,
+                  st->delete,
+                  st->delete_list,
+                  st->insert,
+                  st->insert_failed,
+                  st->drop,
+                  st->early_drop,
+                  st->error,
+
+                  st->expect_new,
+                  st->expect_create,
+                  st->expect_delete
+               );
+       return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+       .start  = ct_cpu_seq_start,
+       .next   = ct_cpu_seq_next,
+       .stop   = ct_cpu_seq_stop,
+       .show   = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &ct_cpu_seq_ops);
+}
+
+static struct file_operations ct_cpu_seq_fops = {
+       .owner   = THIS_MODULE,
+       .open    = ct_cpu_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_private,
+};
+#endif /* CONFIG_PROC_FS */
+
+/* Sysctl support */
+
+#ifdef CONFIG_SYSCTL
+
+/* From nf_conntrack_core.c */
+extern int nf_conntrack_max;
+extern unsigned int nf_conntrack_htable_size;
+
+/* From nf_conntrack_proto_tcp.c */
+extern unsigned long nf_ct_tcp_timeout_syn_sent;
+extern unsigned long nf_ct_tcp_timeout_syn_recv;
+extern unsigned long nf_ct_tcp_timeout_established;
+extern unsigned long nf_ct_tcp_timeout_fin_wait;
+extern unsigned long nf_ct_tcp_timeout_close_wait;
+extern unsigned long nf_ct_tcp_timeout_last_ack;
+extern unsigned long nf_ct_tcp_timeout_time_wait;
+extern unsigned long nf_ct_tcp_timeout_close;
+extern unsigned long nf_ct_tcp_timeout_max_retrans;
+extern int nf_ct_tcp_loose;
+extern int nf_ct_tcp_be_liberal;
+extern int nf_ct_tcp_max_retrans;
+
+/* From nf_conntrack_proto_udp.c */
+extern unsigned long nf_ct_udp_timeout;
+extern unsigned long nf_ct_udp_timeout_stream;
+
+/* From nf_conntrack_proto_generic.c */
+extern unsigned long nf_ct_generic_timeout;
+
+/* Log invalid packets of a given protocol */
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static struct ctl_table_header *nf_ct_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+       {
+               .ctl_name       = NET_NF_CONNTRACK_MAX,
+               .procname       = "nf_conntrack_max",
+               .data           = &nf_conntrack_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_COUNT,
+               .procname       = "nf_conntrack_count",
+               .data           = &nf_conntrack_count,
+               .maxlen         = sizeof(int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_BUCKETS,
+               .procname       = "nf_conntrack_buckets",
+               .data           = &nf_conntrack_htable_size,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
+               .procname       = "nf_conntrack_tcp_timeout_syn_sent",
+               .data           = &nf_ct_tcp_timeout_syn_sent,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
+               .procname       = "nf_conntrack_tcp_timeout_syn_recv",
+               .data           = &nf_ct_tcp_timeout_syn_recv,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
+               .procname       = "nf_conntrack_tcp_timeout_established",
+               .data           = &nf_ct_tcp_timeout_established,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
+               .procname       = "nf_conntrack_tcp_timeout_fin_wait",
+               .data           = &nf_ct_tcp_timeout_fin_wait,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
+               .procname       = "nf_conntrack_tcp_timeout_close_wait",
+               .data           = &nf_ct_tcp_timeout_close_wait,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
+               .procname       = "nf_conntrack_tcp_timeout_last_ack",
+               .data           = &nf_ct_tcp_timeout_last_ack,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
+               .procname       = "nf_conntrack_tcp_timeout_time_wait",
+               .data           = &nf_ct_tcp_timeout_time_wait,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
+               .procname       = "nf_conntrack_tcp_timeout_close",
+               .data           = &nf_ct_tcp_timeout_close,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_UDP_TIMEOUT,
+               .procname       = "nf_conntrack_udp_timeout",
+               .data           = &nf_ct_udp_timeout,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
+               .procname       = "nf_conntrack_udp_timeout_stream",
+               .data           = &nf_ct_udp_timeout_stream,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_GENERIC_TIMEOUT,
+               .procname       = "nf_conntrack_generic_timeout",
+               .data           = &nf_ct_generic_timeout,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_LOG_INVALID,
+               .procname       = "nf_conntrack_log_invalid",
+               .data           = &nf_ct_log_invalid,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &log_invalid_proto_min,
+               .extra2         = &log_invalid_proto_max,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+               .procname       = "nf_conntrack_tcp_timeout_max_retrans",
+               .data           = &nf_ct_tcp_timeout_max_retrans,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_LOOSE,
+               .procname       = "nf_conntrack_tcp_loose",
+               .data           = &nf_ct_tcp_loose,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
+               .procname       = "nf_conntrack_tcp_be_liberal",
+               .data           = &nf_ct_tcp_be_liberal,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
+               .procname       = "nf_conntrack_tcp_max_retrans",
+               .data           = &nf_ct_tcp_max_retrans,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+
+       { .ctl_name = 0 }
+};
+
+#define NET_NF_CONNTRACK_MAX 2089
+
+static ctl_table nf_ct_netfilter_table[] = {
+       {
+               .ctl_name       = NET_NETFILTER,
+               .procname       = "netfilter",
+               .mode           = 0555,
+               .child          = nf_ct_sysctl_table,
+       },
+       {
+               .ctl_name       = NET_NF_CONNTRACK_MAX,
+               .procname       = "nf_conntrack_max",
+               .data           = &nf_conntrack_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+       {
+               .ctl_name       = CTL_NET,
+               .procname       = "net",
+               .mode           = 0555,
+               .child          = nf_ct_netfilter_table,
+       },
+       { .ctl_name = 0 }
+};
+EXPORT_SYMBOL(nf_ct_log_invalid);
+#endif /* CONFIG_SYSCTL */
+
+static int init_or_cleanup(int init)
+{
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+#endif
+       int ret = 0;
+
+       if (!init) goto cleanup;
+
+       ret = nf_conntrack_init();
+       if (ret < 0)
+               goto cleanup_nothing;
+
+#ifdef CONFIG_PROC_FS
+       proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
+       if (!proc) goto cleanup_init;
+
+       proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
+                                       &exp_file_ops);
+       if (!proc_exp) goto cleanup_proc;
+
+       proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
+       if (!proc_stat)
+               goto cleanup_proc_exp;
+
+       proc_stat->proc_fops = &ct_cpu_seq_fops;
+       proc_stat->owner = THIS_MODULE;
+#endif
+#ifdef CONFIG_SYSCTL
+       nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+       if (nf_ct_sysctl_header == NULL) {
+               printk("nf_conntrack: can't register to sysctl.\n");
+               ret = -ENOMEM;
+               goto cleanup_proc_stat;
+       }
+#endif
+
+       return ret;
+
+ cleanup:
+#ifdef CONFIG_SYSCTL
+       unregister_sysctl_table(nf_ct_sysctl_header);
+ cleanup_proc_stat:
+#endif
+#ifdef CONFIG_PROC_FS
+       proc_net_remove("nf_conntrack_stat");
+ cleanup_proc_exp:
+       proc_net_remove("nf_conntrack_expect");
+ cleanup_proc:
+       proc_net_remove("nf_conntrack");
+ cleanup_init:
+#endif /* CNFIG_PROC_FS */
+       nf_conntrack_cleanup();
+ cleanup_nothing:
+       return ret;
+}
+
+int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
+{
+       int ret = 0;
+
+       write_lock_bh(&nf_conntrack_lock);
+       if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) {
+               ret = -EBUSY;
+               goto out;
+       }
+       nf_ct_l3protos[proto->l3proto] = proto;
+out:
+       write_unlock_bh(&nf_conntrack_lock);
+
+       return ret;
+}
+
+void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+{
+       write_lock_bh(&nf_conntrack_lock);
+       nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto;
+       write_unlock_bh(&nf_conntrack_lock);
+       
+       /* Somebody could be still looking at the proto in bh. */
+       synchronize_net();
+
+       /* Remove all contrack entries for this protocol */
+       nf_ct_iterate_cleanup(kill_l3proto, proto);
+}
+
+/* FIXME: Allow NULL functions and sub in pointers to generic for
+   them. --RR */
+int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto)
+{
+       int ret = 0;
+
+retry:
+       write_lock_bh(&nf_conntrack_lock);
+       if (nf_ct_protos[proto->l3proto]) {
+               if (nf_ct_protos[proto->l3proto][proto->proto]
+                               != &nf_conntrack_generic_protocol) {
+                       ret = -EBUSY;
+                       goto out_unlock;
+               }
+       } else {
+               /* l3proto may be loaded latter. */
+               struct nf_conntrack_protocol **proto_array;
+               int i;
+
+               write_unlock_bh(&nf_conntrack_lock);
+
+               proto_array = (struct nf_conntrack_protocol **)
+                               kmalloc(MAX_NF_CT_PROTO *
+                                        sizeof(struct nf_conntrack_protocol *),
+                                       GFP_KERNEL);
+               if (proto_array == NULL) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               for (i = 0; i < MAX_NF_CT_PROTO; i++)
+                       proto_array[i] = &nf_conntrack_generic_protocol;
+
+               write_lock_bh(&nf_conntrack_lock);
+               if (nf_ct_protos[proto->l3proto]) {
+                       /* bad timing, but no problem */
+                       write_unlock_bh(&nf_conntrack_lock);
+                       kfree(proto_array);
+               } else {
+                       nf_ct_protos[proto->l3proto] = proto_array;
+                       write_unlock_bh(&nf_conntrack_lock);
+               }
+
+               /*
+                * Just once because array is never freed until unloading
+                * nf_conntrack.ko
+                */
+               goto retry;
+       }
+
+       nf_ct_protos[proto->l3proto][proto->proto] = proto;
+
+out_unlock:
+       write_unlock_bh(&nf_conntrack_lock);
+out:
+       return ret;
+}
+
+void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto)
+{
+       write_lock_bh(&nf_conntrack_lock);
+       nf_ct_protos[proto->l3proto][proto->proto]
+               = &nf_conntrack_generic_protocol;
+       write_unlock_bh(&nf_conntrack_lock);
+       
+       /* Somebody could be still looking at the proto in bh. */
+       synchronize_net();
+
+       /* Remove all contrack entries for this protocol */
+       nf_ct_iterate_cleanup(kill_proto, proto);
+}
+
+static int __init init(void)
+{
+       return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+       init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+/* Some modules need us, but don't depend directly on any symbol.
+   They should call this. */
+void need_nf_conntrack(void)
+{
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
+EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+#endif
+EXPORT_SYMBOL(nf_conntrack_l3proto_register);
+EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
+EXPORT_SYMBOL(nf_conntrack_protocol_register);
+EXPORT_SYMBOL(nf_conntrack_protocol_unregister);
+EXPORT_SYMBOL(nf_ct_invert_tuplepr);
+EXPORT_SYMBOL(nf_conntrack_alter_reply);
+EXPORT_SYMBOL(nf_conntrack_destroyed);
+EXPORT_SYMBOL(need_nf_conntrack);
+EXPORT_SYMBOL(nf_conntrack_helper_register);
+EXPORT_SYMBOL(nf_conntrack_helper_unregister);
+EXPORT_SYMBOL(nf_ct_iterate_cleanup);
+EXPORT_SYMBOL(__nf_ct_refresh_acct);
+EXPORT_SYMBOL(nf_ct_protos);
+EXPORT_SYMBOL(nf_ct_find_proto);
+EXPORT_SYMBOL(nf_ct_l3protos);
+EXPORT_SYMBOL(nf_conntrack_expect_alloc);
+EXPORT_SYMBOL(nf_conntrack_expect_put);
+EXPORT_SYMBOL(nf_conntrack_expect_related);
+EXPORT_SYMBOL(nf_conntrack_unexpect_related);
+EXPORT_SYMBOL(nf_conntrack_tuple_taken);
+EXPORT_SYMBOL(nf_conntrack_htable_size);
+EXPORT_SYMBOL(nf_conntrack_lock);
+EXPORT_SYMBOL(nf_conntrack_hash);
+EXPORT_SYMBOL(nf_conntrack_untracked);
+EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+EXPORT_SYMBOL(nf_conntrack_tcp_update);
+#endif
+EXPORT_SYMBOL(__nf_conntrack_confirm);
+EXPORT_SYMBOL(nf_ct_get_tuple);
+EXPORT_SYMBOL(nf_ct_invert_tuple);
+EXPORT_SYMBOL(nf_conntrack_in);
+EXPORT_SYMBOL(__nf_conntrack_attach);
index 39d9c2dcd03cfc9f48c927852a303a716c009b6c..e3589c2de49e9b5e6c76c1d4a4e02a58dda5dd46 100644 (file)
@@ -2,4 +2,4 @@
 # Makefile for the netlink driver.
 #
 
-obj-y                                  := af_netlink.o
+obj-y                                  := af_netlink.o attr.o genetlink.o
index 5ca283537bc66e9344c2e0295b6139d0628454a9..8c38ee6d255eb04aa2ddab13b8d3e448b71dec53 100644 (file)
@@ -58,6 +58,7 @@
 
 #include <net/sock.h>
 #include <net/scm.h>
+#include <net/netlink.h>
 
 #define Nprintk(a...)
 #define NLGRPSZ(x)     (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -427,7 +428,8 @@ static int netlink_release(struct socket *sock)
 
        spin_lock(&nlk->cb_lock);
        if (nlk->cb) {
-               nlk->cb->done(nlk->cb);
+               if (nlk->cb->done)
+                       nlk->cb->done(nlk->cb);
                netlink_destroy_callback(nlk->cb);
                nlk->cb = NULL;
        }
@@ -1322,7 +1324,8 @@ static int netlink_dump(struct sock *sk)
        skb_queue_tail(&sk->sk_receive_queue, skb);
        sk->sk_data_ready(sk, skb->len);
 
-       cb->done(cb);
+       if (cb->done)
+               cb->done(cb);
        nlk->cb = NULL;
        spin_unlock(&nlk->cb_lock);
 
@@ -1409,6 +1412,94 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
 
+static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
+                                                    struct nlmsghdr *, int *))
+{
+       unsigned int total_len;
+       struct nlmsghdr *nlh;
+       int err;
+
+       while (skb->len >= nlmsg_total_size(0)) {
+               nlh = (struct nlmsghdr *) skb->data;
+
+               if (skb->len < nlh->nlmsg_len)
+                       return 0;
+
+               total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
+
+               if (cb(skb, nlh, &err) < 0) {
+                       /* Not an error, but we have to interrupt processing
+                        * here. Note: that in this case we do not pull
+                        * message from skb, it will be processed later.
+                        */
+                       if (err == 0)
+                               return -1;
+                       netlink_ack(skb, nlh, err);
+               } else if (nlh->nlmsg_flags & NLM_F_ACK)
+                       netlink_ack(skb, nlh, 0);
+
+               skb_pull(skb, total_len);
+       }
+
+       return 0;
+}
+
+/**
+ * nelink_run_queue - Process netlink receive queue.
+ * @sk: Netlink socket containing the queue
+ * @qlen: Place to store queue length upon entry
+ * @cb: Callback function invoked for each netlink message found
+ *
+ * Processes as much as there was in the queue upon entry and invokes
+ * a callback function for each netlink message found. The callback
+ * function may refuse a message by returning a negative error code
+ * but setting the error pointer to 0 in which case this function
+ * returns with a qlen != 0.
+ *
+ * qlen must be initialized to 0 before the initial entry, afterwards
+ * the function may be called repeatedly until qlen reaches 0.
+ */
+void netlink_run_queue(struct sock *sk, unsigned int *qlen,
+                      int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
+{
+       struct sk_buff *skb;
+
+       if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
+               *qlen = skb_queue_len(&sk->sk_receive_queue);
+
+       for (; *qlen; (*qlen)--) {
+               skb = skb_dequeue(&sk->sk_receive_queue);
+               if (netlink_rcv_skb(skb, cb)) {
+                       if (skb->len)
+                               skb_queue_head(&sk->sk_receive_queue, skb);
+                       else {
+                               kfree_skb(skb);
+                               (*qlen)--;
+                       }
+                       break;
+               }
+
+               kfree_skb(skb);
+       }
+}
+
+/**
+ * netlink_queue_skip - Skip netlink message while processing queue.
+ * @nlh: Netlink message to be skipped
+ * @skb: Socket buffer containing the netlink messages.
+ *
+ * Pulls the given netlink message off the socket buffer so the next
+ * call to netlink_queue_run() will not reconsider the message.
+ */
+void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
+{
+       int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+
+       if (msglen > skb->len)
+               msglen = skb->len;
+
+       skb_pull(skb, msglen);
+}
 
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
@@ -1657,6 +1748,8 @@ out:
 core_initcall(netlink_proto_init);
 
 EXPORT_SYMBOL(netlink_ack);
+EXPORT_SYMBOL(netlink_run_queue);
+EXPORT_SYMBOL(netlink_queue_skip);
 EXPORT_SYMBOL(netlink_broadcast);
 EXPORT_SYMBOL(netlink_dump_start);
 EXPORT_SYMBOL(netlink_kernel_create);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
new file mode 100644 (file)
index 0000000..fffef4a
--- /dev/null
@@ -0,0 +1,328 @@
+/*
+ * NETLINK      Netlink attributes
+ *
+ *             Authors:        Thomas Graf <tgraf@suug.ch>
+ *                             Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+
+static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+       [NLA_U8]        = sizeof(u8),
+       [NLA_U16]       = sizeof(u16),
+       [NLA_U32]       = sizeof(u32),
+       [NLA_U64]       = sizeof(u64),
+       [NLA_STRING]    = 1,
+       [NLA_NESTED]    = NLA_HDRLEN,
+};
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+                       struct nla_policy *policy)
+{
+       struct nla_policy *pt;
+       int minlen = 0;
+
+       if (nla->nla_type <= 0 || nla->nla_type > maxtype)
+               return 0;
+
+       pt = &policy[nla->nla_type];
+
+       BUG_ON(pt->type > NLA_TYPE_MAX);
+
+       if (pt->minlen)
+               minlen = pt->minlen;
+       else if (pt->type != NLA_UNSPEC)
+               minlen = nla_attr_minlen[pt->type];
+
+       if (pt->type == NLA_FLAG && nla_len(nla) > 0)
+               return -ERANGE;
+
+       if (nla_len(nla) < minlen)
+               return -ERANGE;
+
+       return 0;
+}
+
+/**
+ * nla_validate - Validate a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the specified attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_validate(struct nlattr *head, int len, int maxtype,
+                struct nla_policy *policy)
+{
+       struct nlattr *nla;
+       int rem, err;
+
+       nla_for_each_attr(nla, head, len, rem) {
+               err = validate_nla(nla, maxtype, policy);
+               if (err < 0)
+                       goto errout;
+       }
+
+       err = 0;
+errout:
+       return err;
+}
+
+/**
+ * nla_parse - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessable via the attribute type. Attributes with a type
+ * exceeding maxtype will be silently ignored for backwards compatibility
+ * reasons. policy may be set to NULL if no validation is required.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+             struct nla_policy *policy)
+{
+       struct nlattr *nla;
+       int rem, err;
+
+       memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+       nla_for_each_attr(nla, head, len, rem) {
+               u16 type = nla->nla_type;
+
+               if (type > 0 && type <= maxtype) {
+                       if (policy) {
+                               err = validate_nla(nla, maxtype, policy);
+                               if (err < 0)
+                                       goto errout;
+                       }
+
+                       tb[type] = nla;
+               }
+       }
+
+       if (unlikely(rem > 0))
+               printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
+                      "attributes.\n", rem);
+
+       err = 0;
+errout:
+       return err;
+}
+
+/**
+ * nla_find - Find a specific attribute in a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute in the stream matching the specified type.
+ */
+struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
+{
+       struct nlattr *nla;
+       int rem;
+
+       nla_for_each_attr(nla, head, len, rem)
+               if (nla->nla_type == attrtype)
+                       return nla;
+
+       return NULL;
+}
+
+/**
+ * nla_strlcpy - Copy string attribute payload into a sized buffer
+ * @dst: where to copy the string to
+ * @src: attribute to copy the string from
+ * @dstsize: size of destination buffer
+ *
+ * Copies at most dstsize - 1 bytes into the destination buffer.
+ * The result is always a valid NUL-terminated string. Unlike
+ * strlcpy the destination buffer is always padded out.
+ *
+ * Returns the length of the source buffer.
+ */
+size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
+{
+       size_t srclen = nla_len(nla);
+       char *src = nla_data(nla);
+
+       if (srclen > 0 && src[srclen - 1] == '\0')
+               srclen--;
+
+       if (dstsize > 0) {
+               size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
+
+               memset(dst, 0, dstsize);
+               memcpy(dst, src, len);
+       }
+
+       return srclen;
+}
+
+/**
+ * nla_memcpy - Copy a netlink attribute into another memory area
+ * @dest: where to copy to memcpy
+ * @src: netlink attribute to copy from
+ * @count: size of the destination area
+ *
+ * Note: The number of bytes copied is limited by the length of
+ *       attribute's payload. memcpy
+ *
+ * Returns the number of bytes copied.
+ */
+int nla_memcpy(void *dest, struct nlattr *src, int count)
+{
+       int minlen = min_t(int, count, nla_len(src));
+
+       memcpy(dest, nla_data(src), minlen);
+
+       return minlen;
+}
+
+/**
+ * nla_memcmp - Compare an attribute with sized memory area
+ * @nla: netlink attribute
+ * @data: memory area
+ * @size: size of memory area
+ */
+int nla_memcmp(const struct nlattr *nla, const void *data,
+                            size_t size)
+{
+       int d = nla_len(nla) - size;
+
+       if (d == 0)
+               d = memcmp(nla_data(nla), data, size);
+
+       return d;
+}
+
+/**
+ * nla_strcmp - Compare a string attribute against a string
+ * @nla: netlink string attribute
+ * @str: another string
+ */
+int nla_strcmp(const struct nlattr *nla, const char *str)
+{
+       int len = strlen(str) + 1;
+       int d = nla_len(nla) - len;
+
+       if (d == 0)
+               d = memcmp(nla_data(nla), str, len);
+
+       return d;
+}
+
+/**
+ * __nla_reserve - reserve room for attribute on the skb
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
+{
+       struct nlattr *nla;
+
+       nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
+       nla->nla_type = attrtype;
+       nla->nla_len = nla_attr_size(attrlen);
+
+       memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
+
+       return nla;
+}
+
+/**
+ * nla_reserve - reserve room for attribute on the skb
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
+{
+       if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+               return NULL;
+
+       return __nla_reserve(skb, attrtype, attrlen);
+}
+
+/**
+ * __nla_put - Add a netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
+                            const void *data)
+{
+       struct nlattr *nla;
+
+       nla = __nla_reserve(skb, attrtype, attrlen);
+       memcpy(nla_data(nla), data, attrlen);
+}
+
+
+/**
+ * nla_put - Add a netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+       if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+               return -1;
+
+       __nla_put(skb, attrtype, attrlen, data);
+       return 0;
+}
+
+
+EXPORT_SYMBOL(nla_validate);
+EXPORT_SYMBOL(nla_parse);
+EXPORT_SYMBOL(nla_find);
+EXPORT_SYMBOL(nla_strlcpy);
+EXPORT_SYMBOL(__nla_reserve);
+EXPORT_SYMBOL(nla_reserve);
+EXPORT_SYMBOL(__nla_put);
+EXPORT_SYMBOL(nla_put);
+EXPORT_SYMBOL(nla_memcpy);
+EXPORT_SYMBOL(nla_memcmp);
+EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
new file mode 100644 (file)
index 0000000..287cfcc
--- /dev/null
@@ -0,0 +1,579 @@
+/*
+ * NETLINK      Generic Netlink Family
+ *
+ *             Authors:        Jamal Hadi Salim
+ *                             Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+struct sock *genl_sock = NULL;
+
+static DECLARE_MUTEX(genl_sem); /* serialization of message processing */
+
+static void genl_lock(void)
+{
+       down(&genl_sem);
+}
+
+static int genl_trylock(void)
+{
+       return down_trylock(&genl_sem);
+}
+
+static void genl_unlock(void)
+{
+       up(&genl_sem);
+
+       if (genl_sock && genl_sock->sk_receive_queue.qlen)
+               genl_sock->sk_data_ready(genl_sock, 0);
+}
+
+#define GENL_FAM_TAB_SIZE      16
+#define GENL_FAM_TAB_MASK      (GENL_FAM_TAB_SIZE - 1)
+
+static struct list_head family_ht[GENL_FAM_TAB_SIZE];
+
+static int genl_ctrl_event(int event, void *data);
+
+static inline unsigned int genl_family_hash(unsigned int id)
+{
+       return id & GENL_FAM_TAB_MASK;
+}
+
+static inline struct list_head *genl_family_chain(unsigned int id)
+{
+       return &family_ht[genl_family_hash(id)];
+}
+
+static struct genl_family *genl_family_find_byid(unsigned int id)
+{
+       struct genl_family *f;
+
+       list_for_each_entry(f, genl_family_chain(id), family_list)
+               if (f->id == id)
+                       return f;
+
+       return NULL;
+}
+
+static struct genl_family *genl_family_find_byname(char *name)
+{
+       struct genl_family *f;
+       int i;
+
+       for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
+               list_for_each_entry(f, genl_family_chain(i), family_list)
+                       if (strcmp(f->name, name) == 0)
+                               return f;
+
+       return NULL;
+}
+
+static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
+{
+       struct genl_ops *ops;
+
+       list_for_each_entry(ops, &family->ops_list, ops_list)
+               if (ops->cmd == cmd)
+                       return ops;
+
+       return NULL;
+}
+
+/* Of course we are going to have problems once we hit
+ * 2^16 alive types, but that can only happen by year 2K
+*/
+static inline u16 genl_generate_id(void)
+{
+       static u16 id_gen_idx;
+       int overflowed = 0;
+
+       do {
+               if (id_gen_idx == 0)
+                       id_gen_idx = GENL_MIN_ID;
+
+               if (++id_gen_idx > GENL_MAX_ID) {
+                       if (!overflowed) {
+                               overflowed = 1;
+                               id_gen_idx = 0;
+                               continue;
+                       } else
+                               return 0;
+               }
+
+       } while (genl_family_find_byid(id_gen_idx));
+
+       return id_gen_idx;
+}
+
+/**
+ * genl_register_ops - register generic netlink operations
+ * @family: generic netlink family
+ * @ops: operations to be registered
+ *
+ * Registers the specified operations and assigns them to the specified
+ * family. Either a doit or dumpit callback must be specified or the
+ * operation will fail. Only one operation structure per command
+ * identifier may be registered.
+ *
+ * See include/net/genetlink.h for more documenation on the operations
+ * structure.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
+{
+       int err = -EINVAL;
+
+       if (ops->dumpit == NULL && ops->doit == NULL)
+               goto errout;
+
+       if (genl_get_cmd(ops->cmd, family)) {
+               err = -EEXIST;
+               goto errout;
+       }
+
+       genl_lock();
+       list_add_tail(&ops->ops_list, &family->ops_list);
+       genl_unlock();
+
+       genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
+       err = 0;
+errout:
+       return err;
+}
+
+/**
+ * genl_unregister_ops - unregister generic netlink operations
+ * @family: generic netlink family
+ * @ops: operations to be unregistered
+ *
+ * Unregisters the specified operations and unassigns them from the
+ * specified family. The operation blocks until the current message
+ * processing has finished and doesn't start again until the
+ * unregister process has finished.
+ *
+ * Note: It is not necessary to unregister all operations before
+ *       unregistering the family, unregistering the family will cause
+ *       all assigned operations to be unregistered automatically.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
+{
+       struct genl_ops *rc;
+
+       genl_lock();
+       list_for_each_entry(rc, &family->ops_list, ops_list) {
+               if (rc == ops) {
+                       list_del(&ops->ops_list);
+                       genl_unlock();
+                       genl_ctrl_event(CTRL_CMD_DELOPS, ops);
+                       return 0;
+               }
+       }
+       genl_unlock();
+
+       return -ENOENT;
+}
+
+/**
+ * genl_register_family - register a generic netlink family
+ * @family: generic netlink family
+ *
+ * Registers the specified family after validating it first. Only one
+ * family may be registered with the same family name or identifier.
+ * The family id may equal GENL_ID_GENERATE causing an unique id to
+ * be automatically generated and assigned.
+ *
+ * Return 0 on success or a negative error code.
+ */
+int genl_register_family(struct genl_family *family)
+{
+       int err = -EINVAL;
+
+       if (family->id && family->id < GENL_MIN_ID)
+               goto errout;
+
+       if (family->id > GENL_MAX_ID)
+               goto errout;
+
+       INIT_LIST_HEAD(&family->ops_list);
+
+       genl_lock();
+
+       if (genl_family_find_byname(family->name)) {
+               err = -EEXIST;
+               goto errout_locked;
+       }
+
+       if (genl_family_find_byid(family->id)) {
+               err = -EEXIST;
+               goto errout_locked;
+       }
+
+       if (!try_module_get(family->owner)) {
+               err = -EBUSY;
+               goto errout_locked;
+       }
+
+       if (family->id == GENL_ID_GENERATE) {
+               u16 newid = genl_generate_id();
+
+               if (!newid) {
+                       err = -ENOMEM;
+                       goto errout_locked;
+               }
+
+               family->id = newid;
+       }
+
+       if (family->maxattr) {
+               family->attrbuf = kmalloc((family->maxattr+1) *
+                                       sizeof(struct nlattr *), GFP_KERNEL);
+               if (family->attrbuf == NULL) {
+                       err = -ENOMEM;
+                       goto errout;
+               }
+       } else
+               family->attrbuf = NULL;
+
+       list_add_tail(&family->family_list, genl_family_chain(family->id));
+       genl_unlock();
+
+       genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
+
+       return 0;
+
+errout_locked:
+       genl_unlock();
+errout:
+       return err;
+}
+
+/**
+ * genl_unregister_family - unregister generic netlink family
+ * @family: generic netlink family
+ *
+ * Unregisters the specified family.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int genl_unregister_family(struct genl_family *family)
+{
+       struct genl_family *rc;
+
+       genl_lock();
+
+       list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
+               if (family->id != rc->id || strcmp(rc->name, family->name))
+                       continue;
+
+               list_del(&rc->family_list);
+               INIT_LIST_HEAD(&family->ops_list);
+               genl_unlock();
+
+               module_put(family->owner);
+               kfree(family->attrbuf);
+               genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
+               return 0;
+       }
+
+       genl_unlock();
+
+       return -ENOENT;
+}
+
+static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+                              int *errp)
+{
+       struct genl_ops *ops;
+       struct genl_family *family;
+       struct genl_info info;
+       struct genlmsghdr *hdr = nlmsg_data(nlh);
+       int hdrlen, err = -EINVAL;
+
+       if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+               goto ignore;
+
+       if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+               goto ignore;
+
+               family = genl_family_find_byid(nlh->nlmsg_type);
+       if (family == NULL) {
+               err = -ENOENT;
+               goto errout;
+       }
+
+       hdrlen = GENL_HDRLEN + family->hdrsize;
+       if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+               goto errout;
+
+       ops = genl_get_cmd(hdr->cmd, family);
+       if (ops == NULL) {
+               err = -EOPNOTSUPP;
+               goto errout;
+       }
+
+       if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) {
+               err = -EPERM;
+               goto errout;
+       }
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               if (ops->dumpit == NULL) {
+                       err = -EOPNOTSUPP;
+                       goto errout;
+               }
+
+               *errp = err = netlink_dump_start(genl_sock, skb, nlh,
+                                                ops->dumpit, NULL);
+               if (err == 0)
+                       skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
+                                         skb->len));
+               return -1;
+       }
+
+       if (ops->doit == NULL) {
+               err = -EOPNOTSUPP;
+               goto errout;
+       }
+
+       if (family->attrbuf) {
+               err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
+                                 ops->policy);
+               if (err < 0)
+                       goto errout;
+       }
+
+       info.snd_seq = nlh->nlmsg_seq;
+       info.snd_pid = NETLINK_CB(skb).pid;
+       info.nlhdr = nlh;
+       info.genlhdr = nlmsg_data(nlh);
+       info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
+       info.attrs = family->attrbuf;
+
+       *errp = err = ops->doit(skb, &info);
+       return err;
+
+ignore:
+       return 0;
+
+errout:
+       *errp = err;
+       return -1;
+}
+
+static void genl_rcv(struct sock *sk, int len)
+{
+       unsigned int qlen = 0;
+
+       do {
+               if (genl_trylock())
+                       return;
+               netlink_run_queue(sk, &qlen, &genl_rcv_msg);
+               genl_unlock();
+       } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen);
+}
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
+                         u32 flags, struct sk_buff *skb, u8 cmd)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
+                         family->version);
+       if (hdr == NULL)
+               return -1;
+
+       NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
+       NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
+
+       return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+       return genlmsg_cancel(skb, hdr);
+}
+
+static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
+{
+
+       int i, n = 0;
+       struct genl_family *rt;
+       int chains_to_skip = cb->args[0];
+       int fams_to_skip = cb->args[1];
+
+       for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+               if (i < chains_to_skip)
+                       continue;
+               n = 0;
+               list_for_each_entry(rt, genl_family_chain(i), family_list) {
+                       if (++n < fams_to_skip)
+                               continue;
+                       if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
+                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                          skb, CTRL_CMD_NEWFAMILY) < 0)
+                               goto errout;
+               }
+
+               fams_to_skip = 0;
+       }
+
+errout:
+       cb->args[0] = i;
+       cb->args[1] = n;
+
+       return skb->len;
+}
+
+static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
+                                     int seq, int cmd)
+{
+       struct sk_buff *skb;
+       int err;
+
+       skb = nlmsg_new(NLMSG_GOODSIZE);
+       if (skb == NULL)
+               return ERR_PTR(-ENOBUFS);
+
+       err = ctrl_fill_info(family, pid, seq, 0, skb, cmd);
+       if (err < 0) {
+               nlmsg_free(skb);
+               return ERR_PTR(err);
+       }
+
+       return skb;
+}
+
+static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
+       [CTRL_ATTR_FAMILY_ID]   = { .type = NLA_U16 },
+       [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING },
+};
+
+static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
+{
+       struct sk_buff *msg;
+       struct genl_family *res = NULL;
+       int err = -EINVAL;
+
+       if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
+               u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]);
+               res = genl_family_find_byid(id);
+       }
+
+       if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
+               char name[GENL_NAMSIZ];
+
+               if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
+                               GENL_NAMSIZ) >= GENL_NAMSIZ)
+                       goto errout;
+
+               res = genl_family_find_byname(name);
+       }
+
+       if (res == NULL) {
+               err = -ENOENT;
+               goto errout;
+       }
+
+       msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq,
+                            CTRL_CMD_NEWFAMILY);
+       if (IS_ERR(msg)) {
+               err = PTR_ERR(msg);
+               goto errout;
+       }
+
+       err = genlmsg_unicast(msg, info->snd_pid);
+errout:
+       return err;
+}
+
+static int genl_ctrl_event(int event, void *data)
+{
+       struct sk_buff *msg;
+
+       if (genl_sock == NULL)
+               return 0;
+
+       switch (event) {
+       case CTRL_CMD_NEWFAMILY:
+       case CTRL_CMD_DELFAMILY:
+               msg = ctrl_build_msg(data, 0, 0, event);
+               if (IS_ERR(msg))
+                       return PTR_ERR(msg);
+
+               genlmsg_multicast(msg, 0, GENL_ID_CTRL);
+               break;
+       }
+
+       return 0;
+}
+
+static struct genl_ops genl_ctrl_ops = {
+       .cmd            = CTRL_CMD_GETFAMILY,
+       .doit           = ctrl_getfamily,
+       .dumpit         = ctrl_dumpfamily,
+       .policy         = ctrl_policy,
+};
+
+static struct genl_family genl_ctrl = {
+       .id = GENL_ID_CTRL,
+       .name = "nlctrl",
+       .version = 0x1,
+       .maxattr = CTRL_ATTR_MAX,
+       .owner = THIS_MODULE,
+};
+
+static int __init genl_init(void)
+{
+       int i, err;
+
+       for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
+               INIT_LIST_HEAD(&family_ht[i]);
+
+       err = genl_register_family(&genl_ctrl);
+       if (err < 0)
+               goto errout;
+
+       err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
+       if (err < 0)
+               goto errout_register;
+
+       netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
+       genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
+                                         genl_rcv, THIS_MODULE);
+       if (genl_sock == NULL) {
+               panic("GENL: Cannot initialize generic netlink\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+
+errout_register:
+       genl_unregister_family(&genl_ctrl);
+errout:
+       panic("GENL: Cannot register controller: %d\n", err);
+       return err;
+}
+
+subsys_initcall(genl_init);
+
+EXPORT_SYMBOL(genl_sock);
+EXPORT_SYMBOL(genl_register_ops);
+EXPORT_SYMBOL(genl_unregister_ops);
+EXPORT_SYMBOL(genl_register_family);
+EXPORT_SYMBOL(genl_unregister_family);
index c35336a0f71b5c50cca286fc2eca45043d205807..0cdd9a07e043714615a42d448bd79bbdf8eed514 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
@@ -26,6 +25,7 @@
 #include <linux/security.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
+#include <net/netlink.h>
 #include <asm/uaccess.h>
 
 static struct sock *xfrm_nl;
@@ -948,11 +948,6 @@ static struct xfrm_link {
        [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy  },
 };
 
-static int xfrm_done(struct netlink_callback *cb)
-{
-       return 0;
-}
-
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 {
        struct rtattr *xfrma[XFRMA_MAX];
@@ -984,20 +979,15 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
        if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
             type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
            (nlh->nlmsg_flags & NLM_F_DUMP)) {
-               u32 rlen;
-
                if (link->dump == NULL)
                        goto err_einval;
 
                if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
-                                               link->dump,
-                                               xfrm_done)) != 0) {
+                                               link->dump, NULL)) != 0) {
                        return -1;
                }
-               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-               if (rlen > skb->len)
-                       rlen = skb->len;
-               skb_pull(skb, rlen);
+
+               netlink_queue_skip(nlh, skb);
                return -1;
        }
 
@@ -1032,60 +1022,13 @@ err_einval:
        return -1;
 }
 
-static int xfrm_user_rcv_skb(struct sk_buff *skb)
-{
-       int err;
-       struct nlmsghdr *nlh;
-
-       while (skb->len >= NLMSG_SPACE(0)) {
-               u32 rlen;
-
-               nlh = (struct nlmsghdr *) skb->data;
-               if (nlh->nlmsg_len < sizeof(*nlh) ||
-                   skb->len < nlh->nlmsg_len)
-                       return 0;
-               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-               if (rlen > skb->len)
-                       rlen = skb->len;
-               if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
-                       if (err == 0)
-                               return -1;
-                       netlink_ack(skb, nlh, err);
-               } else if (nlh->nlmsg_flags & NLM_F_ACK)
-                       netlink_ack(skb, nlh, 0);
-               skb_pull(skb, rlen);
-       }
-
-       return 0;
-}
-
 static void xfrm_netlink_rcv(struct sock *sk, int len)
 {
-       unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+       unsigned int qlen = 0;
 
        do {
-               struct sk_buff *skb;
-
                down(&xfrm_cfg_sem);
-
-               if (qlen > skb_queue_len(&sk->sk_receive_queue))
-                       qlen = skb_queue_len(&sk->sk_receive_queue);
-
-               for (; qlen; qlen--) {
-                       skb = skb_dequeue(&sk->sk_receive_queue);
-                       if (xfrm_user_rcv_skb(skb)) {
-                               if (skb->len)
-                                       skb_queue_head(&sk->sk_receive_queue,
-                                                      skb);
-                               else {
-                                       kfree_skb(skb);
-                                       qlen--;
-                               }
-                               break;
-                       }
-                       kfree_skb(skb);
-               }
-
+               netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg);
                up(&xfrm_cfg_sem);
 
        } while (qlen);