]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
[PATCH] ipvs: ip_vs_ftp breaks connections using persistence
authorJulian Anastasov <ja@ssi.bg>
Mon, 3 Oct 2005 23:27:18 +0000 (16:27 -0700)
committerChris Wright <chrisw@osdl.org>
Mon, 3 Oct 2005 23:27:18 +0000 (16:27 -0700)
ip_vs_ftp when loaded can create NAT connections with unknown
client port for passive FTP. For such expectations we lookup with
cport=0 on incoming packet but it matches the format of the persistence
templates causing packets to other persistent virtual servers to be
forwarded to real server without creating connection. Later the
reply packets are treated as foreign and not SNAT-ed.

If the IPVS box serves both FTP and other services (eg. HTTP)
for the time we wait for first packet for the FTP data connections with
unknown client port (there can be many), other HTTP connections
that have nothing common to the FTP conn break, i.e. HTTP client
sends SYN to the virtual IP but the SYN+ACK is not NAT-ed properly
in IPVS box and the client box returns RST to real server IP. I.e.
the result can be 10% broken HTTP traffic if 10% of the time
there are passive FTP connections in connecting state. It hurts
only IPVS connections.

This patch changes the connection lookup for packets from
clients:

* introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the
connection as template
* create new connection lookup function just for templates - ip_vs_ct_in_get
* make sure ip_vs_conn_in_get hits only connections with
IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way
we avoid returning template when looking for cport=0 (ftp)

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Chris Wright <chrisw@osdl.org>
include/net/ip_vs.h
net/ipv4/ipvs/ip_vs_conn.c
net/ipv4/ipvs/ip_vs_core.c
net/ipv4/ipvs/ip_vs_sync.c

index 52da5d26617a2989f4c0c7a0c017b22388582b77..81329b55f2e640a183688c494fd0356263a719bf 100644 (file)
@@ -84,6 +84,7 @@
 #define IP_VS_CONN_F_IN_SEQ    0x0400          /* must do input seq adjust */
 #define IP_VS_CONN_F_SEQ_MASK  0x0600          /* in/out sequence mask */
 #define IP_VS_CONN_F_NO_CPORT  0x0800          /* no client port set yet */
+#define IP_VS_CONN_F_TEMPLATE  0x1000          /* template, not connection */
 
 /* Move it to better place one day, for now keep it unique */
 #define NFC_IPVS_PROPERTY      0x10000
@@ -740,6 +741,8 @@ enum {
 
 extern struct ip_vs_conn *ip_vs_conn_in_get
 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+extern struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
 extern struct ip_vs_conn *ip_vs_conn_out_get
 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
 
index d0145a8b1551765b2cb31049d256e2546f1d91d1..c29f3378f73660f2ba7fb6ca24df6176c3aafe09 100644 (file)
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (s_addr==cp->caddr && s_port==cp->cport &&
                    d_port==cp->vport && d_addr==cp->vaddr &&
+                   ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
                    protocol==cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
        return cp;
 }
 
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+       unsigned hash;
+       struct ip_vs_conn *cp;
+
+       hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+
+       ct_read_lock(hash);
+
+       list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+               if (s_addr==cp->caddr && s_port==cp->cport &&
+                   d_port==cp->vport && d_addr==cp->vaddr &&
+                   cp->flags & IP_VS_CONN_F_TEMPLATE &&
+                   protocol==cp->protocol) {
+                       /* HIT */
+                       atomic_inc(&cp->refcnt);
+                       goto out;
+               }
+       }
+       cp = NULL;
+
+  out:
+       ct_read_unlock(hash);
+
+       IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+                 ip_vs_proto_name(protocol),
+                 NIPQUAD(s_addr), ntohs(s_port),
+                 NIPQUAD(d_addr), ntohs(d_port),
+                 cp?"hit":"not hit");
+
+       return cp;
+}
 
 /*
  *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
                  atomic_read(&dest->refcnt));
 
        /* Update the connection counters */
-       if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+       if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
                /* It is a normal connection, so increase the inactive
                   connection counter because it is in TCP SYNRECV
                   state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
                  atomic_read(&dest->refcnt));
 
        /* Update the connection counters */
-       if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+       if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
                /* It is a normal connection, so decrease the inactconns
                   or activeconns counter */
                if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
                ct_write_lock_bh(hash);
 
                list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-                       if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+                       if (cp->flags & IP_VS_CONN_F_TEMPLATE)
                                /* connection template */
                                continue;
 
index 5fb257dd07cb38187e0b01158f4a75e4d4d3cd8a..a0dfb952d15edb7edb8dcd44f83c20172e918be2 100644 (file)
@@ -242,10 +242,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        if (ports[1] == svc->port) {
                /* Check if a template already exists */
                if (svc->port != FTPPORT)
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, ports[1]);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -271,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    iph->daddr,
                                                    ports[1],
                                                    dest->addr, dest->port,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
@@ -297,10 +297,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
                 */
                if (svc->fwmark)
-                       ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+                       ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
                                               htonl(svc->fwmark), 0);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -325,14 +325,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    snet, 0,
                                                    htonl(svc->fwmark), 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
index 574d1f509b46cef77c10171c9ffe26d7bcd7cff2..2e5ced3d80622b77577826da486e721da6bed560 100644 (file)
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
        p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
        for (i=0; i<m->nr_conns; i++) {
+               unsigned flags;
+
                s = (struct ip_vs_sync_conn *)p;
-               cp = ip_vs_conn_in_get(s->protocol,
-                                      s->caddr, s->cport,
-                                      s->vaddr, s->vport);
+               flags = ntohs(s->flags);
+               if (!(flags & IP_VS_CONN_F_TEMPLATE))
+                       cp = ip_vs_conn_in_get(s->protocol,
+                                              s->caddr, s->cport,
+                                              s->vaddr, s->vport);
+               else
+                       cp = ip_vs_ct_in_get(s->protocol,
+                                              s->caddr, s->cport,
+                                              s->vaddr, s->vport);
                if (!cp) {
                        cp = ip_vs_conn_new(s->protocol,
                                            s->caddr, s->cport,
                                            s->vaddr, s->vport,
                                            s->daddr, s->dport,
-                                           ntohs(s->flags), NULL);
+                                           flags, NULL);
                        if (!cp) {
                                IP_VS_ERR("ip_vs_conn_new failed\n");
                                return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                } else if (!cp->dest) {
                        /* it is an entry created by the synchronization */
                        cp->state = ntohs(s->state);
-                       cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+                       cp->flags = flags | IP_VS_CONN_F_HASHED;
                }       /* Note that we don't touch its state and flags
                           if it is a normal entry. */
 
-               if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+               if (flags & IP_VS_CONN_F_SEQ_MASK) {
                        opt = (struct ip_vs_sync_conn_options *)&s[1];
                        memcpy(&cp->in_seq, opt, sizeof(*opt));
                        p += FULL_CONN_SIZE;