ipvs: fix oops in backup for fwmark conn templates
[pandora-kernel.git] / net / ipv4 / ipvs / ip_vs_sync.c
index c99f2a3..eff54ef 100644 (file)
@@ -72,7 +72,6 @@ struct ip_vs_sync_thread_data {
        int state;
 };
 
-#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
 #define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
 #define FULL_CONN_SIZE  \
 (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
@@ -284,14 +283,21 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
        struct ip_vs_sync_conn *s;
        struct ip_vs_sync_conn_options *opt;
        struct ip_vs_conn *cp;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_dest *dest;
        char *p;
        int i;
 
+       if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+               IP_VS_ERR_RL("sync message header too short\n");
+               return;
+       }
+
        /* Convert size back to host byte order */
        m->size = ntohs(m->size);
 
        if (buflen != m->size) {
-               IP_VS_ERR("bogus message\n");
+               IP_VS_ERR_RL("bogus sync message size\n");
                return;
        }
 
@@ -304,10 +310,50 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
        p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
        for (i=0; i<m->nr_conns; i++) {
-               unsigned flags;
+               unsigned flags, state;
+
+               if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+                       IP_VS_ERR_RL("bogus conn in sync message\n");
+                       return;
+               }
+               s = (struct ip_vs_sync_conn *) p;
+               flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+               flags &= ~IP_VS_CONN_F_HASHED;
+               if (flags & IP_VS_CONN_F_SEQ_MASK) {
+                       opt = (struct ip_vs_sync_conn_options *)&s[1];
+                       p += FULL_CONN_SIZE;
+                       if (p > buffer+buflen) {
+                               IP_VS_ERR_RL("bogus conn options in sync message\n");
+                               return;
+                       }
+               } else {
+                       opt = NULL;
+                       p += SIMPLE_CONN_SIZE;
+               }
+
+               state = ntohs(s->state);
+               if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+                       pp = ip_vs_proto_get(s->protocol);
+                       if (!pp) {
+                               IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+                                       s->protocol);
+                               continue;
+                       }
+                       if (state >= pp->num_states) {
+                               IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+                                       pp->name, state);
+                               continue;
+                       }
+               } else {
+                       /* protocol in templates is not used for state/timeout */
+                       pp = NULL;
+                       if (state > 0) {
+                               IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+                                       state);
+                               state = 0;
+                       }
+               }
 
-               s = (struct ip_vs_sync_conn *)p;
-               flags = ntohs(s->flags);
                if (!(flags & IP_VS_CONN_F_TEMPLATE))
                        cp = ip_vs_conn_in_get(s->protocol,
                                               s->caddr, s->cport,
@@ -317,38 +363,69 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                                               s->caddr, s->cport,
                                               s->vaddr, s->vport);
                if (!cp) {
+                       /*
+                        * Find the appropriate destination for the connection.
+                        * If it is not found the connection will remain unbound
+                        * but still handled.
+                        */
+                       dest = ip_vs_find_dest(s->daddr, s->dport,
+                                              s->vaddr, s->vport,
+                                              s->protocol);
+                       /*  Set the approprite ativity flag */
+                       if (s->protocol == IPPROTO_TCP) {
+                               if (state != IP_VS_TCP_S_ESTABLISHED)
+                                       flags |= IP_VS_CONN_F_INACTIVE;
+                               else
+                                       flags &= ~IP_VS_CONN_F_INACTIVE;
+                       }
                        cp = ip_vs_conn_new(s->protocol,
                                            s->caddr, s->cport,
                                            s->vaddr, s->vport,
                                            s->daddr, s->dport,
-                                           flags, NULL);
+                                           flags, dest);
+                       if (dest)
+                               atomic_dec(&dest->refcnt);
                        if (!cp) {
                                IP_VS_ERR("ip_vs_conn_new failed\n");
                                return;
                        }
-                       cp->state = ntohs(s->state);
                } else if (!cp->dest) {
-                       /* it is an entry created by the synchronization */
-                       cp->state = ntohs(s->state);
-                       cp->flags = flags | IP_VS_CONN_F_HASHED;
-               }       /* Note that we don't touch its state and flags
-                          if it is a normal entry. */
+                       dest = ip_vs_try_bind_dest(cp);
+                       if (dest)
+                               atomic_dec(&dest->refcnt);
+               } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+                          (cp->state != state)) {
+                       /* update active/inactive flag for the connection */
+                       dest = cp->dest;
+                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                               (state != IP_VS_TCP_S_ESTABLISHED)) {
+                               atomic_dec(&dest->activeconns);
+                               atomic_inc(&dest->inactconns);
+                               cp->flags |= IP_VS_CONN_F_INACTIVE;
+                       } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                               (state == IP_VS_TCP_S_ESTABLISHED)) {
+                               atomic_inc(&dest->activeconns);
+                               atomic_dec(&dest->inactconns);
+                               cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       }
+               }
 
-               if (flags & IP_VS_CONN_F_SEQ_MASK) {
-                       opt = (struct ip_vs_sync_conn_options *)&s[1];
+               if (opt)
                        memcpy(&cp->in_seq, opt, sizeof(*opt));
-                       p += FULL_CONN_SIZE;
-               } else
-                       p += SIMPLE_CONN_SIZE;
-
                atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-               cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
+               cp->state = state;
+               cp->old_state = cp->state;
+               /*
+                * We can not recover the right timeout for templates
+                * in all cases, we can not find the right fwmark
+                * virtual service. If needed, we can do it for
+                * non-fwmark persistent services.
+                */
+               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+                       cp->timeout = pp->timeout_table[state];
+               else
+                       cp->timeout = (3*60*HZ);
                ip_vs_conn_put(cp);
-
-               if (p > buffer+buflen) {
-                       IP_VS_ERR("bogus message\n");
-                       return;
-               }
        }
 }
 
@@ -877,7 +954,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
        if (!tinfo)
                return -ENOMEM;
 
-       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
+       IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
                  sizeof(struct ip_vs_sync_conn));
 
@@ -917,7 +994,7 @@ int stop_sync_thread(int state)
            (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
                return -ESRCH;
 
-       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
+       IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
        IP_VS_INFO("stopping sync thread %d ...\n",
                   (state == IP_VS_STATE_MASTER) ?
                   sync_master_pid : sync_backup_pid);